Merge llvm trunk r238337 from ^/vendor/llvm/dist, resolve conflicts, and

preserve our customizations, where necessary.
author: dim <dim@FreeBSD.org> 2015-05-27 20:26:41 +0000
committer: dim <dim@FreeBSD.org> 2015-05-27 20:26:41 +0000
commit: 5ef8fd3549d38e883a31881636be3dc2a275de20 (patch)
tree: bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/CodeGen
parent: 77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff)
parent: 782067d0278612ee75d024b9b135c221c327e9e8 (diff)
download: FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip
FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz
164 files changed, 19370 insertions, 12286 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 69c3685..58b87e1 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -296,6 +296,16 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
+  // FIXME: We must leave subregisters of live super registers as live, so that
+  // we don't clear out the register tracking information for subregisters of
+  // super registers we're still tracking (and with which we're unioning
+  // subregister definitions).
+  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+    if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) {
+      DEBUG(if (!header && footer) dbgs() << footer);
+      return;
+    }
+
   if (!State->IsLive(Reg)) {
     KillIndices[Reg] = KillIdx;
     DefIndices[Reg] = ~0u;
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
index 12cf95b..18c8bb5 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -63,11 +63,11 @@ class RegisterClassInfo;
     /// Map registers to all their references within a live range.
     std::multimap<unsigned, RegisterReference> RegRefs;
 
-    /// The index of the most recent kill (proceding bottom-up),
+    /// The index of the most recent kill (proceeding bottom-up),
     /// or ~0u if the register is not live.
     std::vector<unsigned> KillIndices;
 
-    /// The index of the most recent complete def (proceding bottom
+    /// The index of the most recent complete def (proceeding bottom
     /// up), or ~0u if the register is live.
     std::vector<unsigned> DefIndices;
 
@@ -127,7 +127,7 @@ class RegisterClassInfo;
     AggressiveAntiDepBreaker(MachineFunction& MFi,
                           const RegisterClassInfo &RCI,
                           TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
-    ~AggressiveAntiDepBreaker();
+    ~AggressiveAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB) override;
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 2e8af9e..3224fac 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -295,8 +295,8 @@ static const Value *getNoopInput(const Value *V,
     } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
       // Value may come from either the aggregate or the scalar
       ArrayRef<unsigned> InsertLoc = IVI->getIndices();
-      if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(),
-                     ValLoc.rbegin())) {
+      if (ValLoc.size() >= InsertLoc.size() &&
+          std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) {
         // The type being inserted is a nested sub-type of the aggregate; we
         // have to remove those initial indices to get the location we're
         // interested in for the operand.
@@ -312,8 +312,7 @@ static const Value *getNoopInput(const Value *V,
       // previous aggregate. Combine the two paths to obtain the true address of
       // our element.
       ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
-      std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(),
-                std::back_inserter(ValLoc));
+      ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
       NoopInput = Op;
     }
     // Terminate if we couldn't find anything to look through.
@@ -518,8 +517,9 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
         return false;
     }
 
+  const Function *F = ExitBB->getParent();
   return returnTypeIsEligibleForTailCall(
-      ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering());
+      F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
 }
 
 bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
@@ -600,10 +600,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
     // The manipulations performed when we're looking through an insertvalue or
     // an extractvalue would happen at the front of the RetPath list, so since
     // we have to copy it anyway it's more efficient to create a reversed copy.
-    using std::copy;
-    SmallVector<unsigned, 4> TmpRetPath, TmpCallPath;
-    copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath));
-    copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath));
+    SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
+    SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
 
     // Finally, we can check whether the value produced by the tail call at this
     // index is compatible with the value we return.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 66c6c63..4cb460a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -36,13 +36,12 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
-ARMException::ARMException(AsmPrinter *A)
-  : EHStreamer(A), shouldEmitCFI(false) {}
+ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
 
 ARMException::~ARMException() {}
 
 ARMTargetStreamer &ARMException::getTargetStreamer() {
-  MCTargetStreamer &TS = *Asm->OutStreamer.getTargetStreamer();
+  MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer();
   return static_cast<ARMTargetStreamer &>(TS);
 }
 
@@ -50,48 +49,35 @@ ARMTargetStreamer &ARMException::getTargetStreamer() {
 /// content.
 void ARMException::endModule() {
   if (shouldEmitCFI)
-    Asm->OutStreamer.EmitCFISections(false, true);
+    Asm->OutStreamer->EmitCFISections(false, true);
 }
 
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
 void ARMException::beginFunction(const MachineFunction *MF) {
   if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
     getTargetStreamer().emitFnStart();
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                Asm->getFunctionNumber()));
   // See if we need call frame info.
   AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
   assert(MoveType != AsmPrinter::CFI_M_EH &&
          "non-EH CFI not yet supported in prologue with EHABI lowering");
   if (MoveType == AsmPrinter::CFI_M_Debug) {
     shouldEmitCFI = true;
-    Asm->OutStreamer.EmitCFIStartProc(false);
+    Asm->OutStreamer->EmitCFIStartProc(false);
   }
 }
 
 /// endFunction - Gather and emit post-function exception information.
 ///
-void ARMException::endFunction(const MachineFunction *) {
-  if (shouldEmitCFI)
-    Asm->OutStreamer.EmitCFIEndProc();
-
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
+void ARMException::endFunction(const MachineFunction *MF) {
   ARMTargetStreamer &ATS = getTargetStreamer();
   if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
       MMI->getLandingPads().empty())
     ATS.emitCantUnwind();
   else {
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                  Asm->getFunctionNumber()));
     if (!MMI->getLandingPads().empty()) {
       // Emit references to personality.
-      if (const Function * Personality =
-          MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+      if (const Function *Personality = MMI->getPersonality()) {
         MCSymbol *PerSym = Asm->getSymbol(Personality);
-        Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+        Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global);
         ATS.emitPersonality(PerSym);
       }
 
@@ -111,13 +97,13 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
   const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
 
-  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+  bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
   int Entry = 0;
   // Emit the Catch TypeInfos.
   if (VerboseAsm && !TypeInfos.empty()) {
-    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
+    Asm->OutStreamer->AddBlankLine();
     Entry = TypeInfos.size();
   }
 
@@ -125,14 +111,14 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalValue *GV = *I;
     if (VerboseAsm)
-      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+      Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
     Asm->EmitTTypeReference(GV, TTypeEncoding);
   }
 
   // Emit the Exception Specifications.
   if (VerboseAsm && !FilterIds.empty()) {
-    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
+    Asm->OutStreamer->AddBlankLine();
     Entry = 0;
   }
   for (std::vector<unsigned>::const_iterator
@@ -141,7 +127,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
     if (VerboseAsm) {
       --Entry;
       if (TypeID != 0)
-        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+        Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
     }
 
     Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]),
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 8dab5e5..2487aba 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -24,12 +24,12 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
 }
 
 // Emit addresses into the section given.
-void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) {
+void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
   if (Pool.empty())
     return;
 
   // Start the dwarf addr section.
-  Asm.OutStreamer.SwitchSection(AddrSection);
+  Asm.OutStreamer->SwitchSection(AddrSection);
 
   // Order the address pool entries by ID
   SmallVector<const MCExpr *, 64> Entries(Pool.size());
@@ -41,5 +41,5 @@ void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) {
             : MCSymbolRefExpr::Create(I.first, Asm.OutContext);
 
   for (const MCExpr *Entry : Entries)
-    Asm.OutStreamer.EmitValue(Entry, Asm.getDataLayout().getPointerSize());
+    Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize());
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index 802e050..211fc98 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -40,7 +40,7 @@ public:
   /// label/symbol.
   unsigned getIndex(const MCSymbol *Sym, bool TLS = false);
 
-  void emit(AsmPrinter &Asm, const MCSection *AddrSection);
+  void emit(AsmPrinter &Asm, MCSection *AddrSection);
 
   bool isEmpty() { return Pool.empty(); }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bbed808..206be70 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -28,7 +28,7 @@
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Mangler.h"
@@ -41,9 +41,11 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -75,11 +77,11 @@ static gcp_map_type &getGCMap(void *&P) {
 /// getGVAlignmentLog2 - Return the alignment to use for the specified global
 /// value in log2 form.  This rounds up to the preferred alignment if possible
 /// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
                                    unsigned InBits = 0) {
   unsigned NumBits = 0;
   if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
-    NumBits = TD.getPreferredAlignmentLog(GVar);
+    NumBits = DL.getPreferredAlignmentLog(GVar);
 
   // If InBits is specified, round it to it.
   if (InBits > NumBits)
@@ -98,15 +100,19 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
   return NumBits;
 }
 
-AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
     : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
-      MII(tm.getSubtargetImpl()->getInstrInfo()),
-      OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr),
-      LastFn(0), Counter(~0U), SetCounter(0) {
-  DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr;
-  CurrentFnSym = CurrentFnSymForSize = nullptr;
+      OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
+      LastMI(nullptr), LastFn(0), Counter(~0U) {
+  DD = nullptr;
+  MMI = nullptr;
+  LI = nullptr;
+  MF = nullptr;
+  CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr;
+  CurrentFnBegin = nullptr;
+  CurrentFnEnd = nullptr;
   GCMetadataPrinters = nullptr;
-  VerboseAsm = Streamer.isVerboseAsm();
+  VerboseAsm = OutStreamer->isVerboseAsm();
 }
 
 AsmPrinter::~AsmPrinter() {
@@ -118,8 +124,6 @@ AsmPrinter::~AsmPrinter() {
     delete &GCMap;
     GCMetadataPrinters = nullptr;
   }
-
-  delete &OutStreamer;
 }
 
 /// getFunctionNumber - Return a unique ID for the current function.
@@ -129,16 +133,17 @@ unsigned AsmPrinter::getFunctionNumber() const {
 }
 
 const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
-  return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
+  return *TM.getObjFileLowering();
 }
 
 /// getDataLayout - Return information about data layout.
 const DataLayout &AsmPrinter::getDataLayout() const {
-  return *TM.getSubtargetImpl()->getDataLayout();
+  return *TM.getDataLayout();
 }
 
 const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
-  return TM.getSubtarget<MCSubtargetInfo>();
+  assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
+  return MF->getSubtarget<MCSubtargetInfo>();
 }
 
 void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
@@ -151,7 +156,7 @@ StringRef AsmPrinter::getTargetTriple() const {
 
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
-  return OutStreamer.getCurrentSection().first;
+  return OutStreamer->getCurrentSection().first;
 }
 
 
@@ -173,9 +178,9 @@ bool AsmPrinter::doInitialization(Module &M) {
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
 
-  OutStreamer.InitSections(false);
+  OutStreamer->InitSections(false);
 
-  Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
+  Mang = new Mangler(TM.getDataLayout());
 
   // Emit the version-min deplyment target directive if needed.
   //
@@ -191,9 +196,9 @@ bool AsmPrinter::doInitialization(Module &M) {
     TT.getOSVersion(Major, Minor, Update);
     // If there is a version specified, Major will be non-zero.
     if (Major)
-      OutStreamer.EmitVersionMin((TT.isMacOSX() ?
-                                  MCVM_OSXVersionMin : MCVM_IOSVersionMin),
-                                 Major, Minor, Update);
+      OutStreamer->EmitVersionMin((TT.isMacOSX() ?
+                                   MCVM_OSXVersionMin : MCVM_IOSVersionMin),
+                                  Major, Minor, Update);
   }
 
   // Allow the target to emit any magic that it wants at the start of the file.
@@ -203,7 +208,7 @@ bool AsmPrinter::doInitialization(Module &M) {
   // don't, this at least helps the user find where a global came from.
   if (MAI->hasSingleParameterDotFile()) {
     // .file "foo.c"
-    OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+    OutStreamer->EmitFileDirective(M.getModuleIdentifier());
   }
 
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -214,11 +219,15 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   // Emit module-level inline asm if it exists.
   if (!M.getModuleInlineAsm().empty()) {
-    OutStreamer.AddComment("Start of file scope inline assembly");
-    OutStreamer.AddBlankLine();
-    EmitInlineAsm(M.getModuleInlineAsm()+"\n");
-    OutStreamer.AddComment("End of file scope inline assembly");
-    OutStreamer.AddBlankLine();
+    // We're at the module level. Construct MCSubtarget from the default CPU
+    // and target triple.
+    std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+        TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+    OutStreamer->AddComment("Start of file scope inline assembly");
+    OutStreamer->AddBlankLine();
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions);
+    OutStreamer->AddComment("End of file scope inline assembly");
+    OutStreamer->AddBlankLine();
   }
 
   if (MAI->doesSupportDebugInformation()) {
@@ -254,10 +263,11 @@ bool AsmPrinter::doInitialization(Module &M) {
   case ExceptionHandling::ARM:
     ES = new ARMException(this);
     break;
-  case ExceptionHandling::ItaniumWinEH:
-  case ExceptionHandling::MSVC:
+  case ExceptionHandling::WinEH:
     switch (MAI->getWinEHEncodingType()) {
     default: llvm_unreachable("unsupported unwinding information encoding");
+    case WinEH::EncodingType::Invalid:
+      break;
     case WinEH::EncodingType::Itanium:
       ES = new Win64Exception(this);
       break;
@@ -286,20 +296,20 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
   case GlobalValue::WeakODRLinkage:
     if (MAI->hasWeakDefDirective()) {
       // .globl _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+      OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
 
       if (!canBeHidden(GV, *MAI))
         // .weak_definition _foo
-        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+        OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
       else
-        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+        OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
     } else if (MAI->hasLinkOnceDirective()) {
       // .globl _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+      OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
       //NOTE: linkonce is handled by the section the symbol was assigned to.
     } else {
       // .weak _foo
-      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+      OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak);
     }
     return;
   case GlobalValue::AppendingLinkage:
@@ -308,7 +318,7 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
   case GlobalValue::ExternalLinkage:
     // If external or appending, declare as a global symbol.
     // .globl _foo
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
     return;
   case GlobalValue::PrivateLinkage:
   case GlobalValue::InternalLinkage:
@@ -337,10 +347,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     if (EmitSpecialLLVMGlobal(GV))
       return;
 
+    // Skip the emission of global equivalents. The symbol can be emitted later
+    // on by emitGlobalGOTEquivs in case it turns out to be needed.
+    if (GlobalGOTEquivs.count(getSymbol(GV)))
+      return;
+
     if (isVerbose()) {
-      GV->printAsOperand(OutStreamer.GetCommentOS(),
+      GV->printAsOperand(OutStreamer->GetCommentOS(),
                      /*PrintType=*/false, GV->getParent());
-      OutStreamer.GetCommentOS() << '\n';
+      OutStreamer->GetCommentOS() << '\n';
     }
   }
 
@@ -356,11 +371,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
                        "' is already defined");
 
   if (MAI->hasDotTypeDotSizeDirective())
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
 
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
 
   // If the alignment is specified, we *must* obey it.  Overaligning a global
@@ -384,16 +399,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
         Align = 0;
 
       // .comm _foo, 42, 4
-      OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+      OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
       return;
     }
 
     // Handle local BSS symbols.
     if (MAI->hasMachoZeroFillDirective()) {
-      const MCSection *TheSection =
-        getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
+      MCSection *TheSection =
+          getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
       // .zerofill __DATA, __bss, _foo, 400, 5
-      OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align);
+      OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align);
       return;
     }
 
@@ -405,7 +420,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     // Prefer to simply fall back to .local / .comm in this case.
     if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
       // .lcomm _foo, 42
-      OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
+      OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align);
       return;
     }
 
@@ -413,14 +428,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       Align = 0;
 
     // .local _foo
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local);
     // .comm _foo, 42, 4
-    OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+    OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
     return;
   }
 
-  const MCSection *TheSection =
-    getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
+  MCSection *TheSection =
+      getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
 
   // Handle the zerofill directive on darwin, which is a special form of BSS
   // emission.
@@ -428,9 +443,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.
 
     // .globl _foo
-    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
     // .zerofill __DATA, __common, _foo, 400, 5
-    OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+    OutStreamer->EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
     return;
   }
 
@@ -447,59 +462,58 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
     // Emit the .tbss symbol
     MCSymbol *MangSym =
-      OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+      OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
 
     if (GVKind.isThreadBSS()) {
       TheSection = getObjFileLowering().getTLSBSSSection();
-      OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+      OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
     } else if (GVKind.isThreadData()) {
-      OutStreamer.SwitchSection(TheSection);
+      OutStreamer->SwitchSection(TheSection);
 
       EmitAlignment(AlignLog, GV);
-      OutStreamer.EmitLabel(MangSym);
+      OutStreamer->EmitLabel(MangSym);
 
       EmitGlobalConstant(GV->getInitializer());
     }
 
-    OutStreamer.AddBlankLine();
+    OutStreamer->AddBlankLine();
 
     // Emit the variable struct for the runtime.
-    const MCSection *TLVSect
-      = getObjFileLowering().getTLSExtraDataSection();
+    MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection();
 
-    OutStreamer.SwitchSection(TLVSect);
+    OutStreamer->SwitchSection(TLVSect);
     // Emit the linkage here.
     EmitLinkage(GV, GVSym);
-    OutStreamer.EmitLabel(GVSym);
+    OutStreamer->EmitLabel(GVSym);
 
     // Three pointers in size:
     //   - __tlv_bootstrap - used to make sure support exists
     //   - spare pointer, used when mapped by the runtime
     //   - pointer to mangled symbol above with initializer
     unsigned PtrSize = DL->getPointerTypeSize(GV->getType());
-    OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+    OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
                                 PtrSize);
-    OutStreamer.EmitIntValue(0, PtrSize);
-    OutStreamer.EmitSymbolValue(MangSym, PtrSize);
+    OutStreamer->EmitIntValue(0, PtrSize);
+    OutStreamer->EmitSymbolValue(MangSym, PtrSize);
 
-    OutStreamer.AddBlankLine();
+    OutStreamer->AddBlankLine();
     return;
   }
 
-  OutStreamer.SwitchSection(TheSection);
+  OutStreamer->SwitchSection(TheSection);
 
   EmitLinkage(GV, GVSym);
   EmitAlignment(AlignLog, GV);
 
-  OutStreamer.EmitLabel(GVSym);
+  OutStreamer->EmitLabel(GVSym);
 
   EmitGlobalConstant(GV->getInitializer());
 
   if (MAI->hasDotTypeDotSizeDirective())
     // .size foo, 42
-    OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+    OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
 
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddBlankLine();
 }
 
 /// EmitFunctionHeader - This method emits the header for the current
@@ -511,20 +525,21 @@ void AsmPrinter::EmitFunctionHeader() {
   // Print the 'header' of function.
   const Function *F = MF->getFunction();
 
-  OutStreamer.SwitchSection(
+  OutStreamer->SwitchSection(
       getObjFileLowering().SectionForGlobal(F, *Mang, TM));
   EmitVisibility(CurrentFnSym, F->getVisibility());
 
   EmitLinkage(F, CurrentFnSym);
-  EmitAlignment(MF->getAlignment(), F);
+  if (MAI->hasFunctionAlignment())
+    EmitAlignment(MF->getAlignment(), F);
 
   if (MAI->hasDotTypeDotSizeDirective())
-    OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+    OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
 
   if (isVerbose()) {
-    F->printAsOperand(OutStreamer.GetCommentOS(),
+    F->printAsOperand(OutStreamer->GetCommentOS(),
                    /*PrintType=*/false, F->getParent());
-    OutStreamer.GetCommentOS() << '\n';
+    OutStreamer->GetCommentOS() << '\n';
   }
 
   // Emit the prefix data.
@@ -541,8 +556,19 @@ void AsmPrinter::EmitFunctionHeader() {
   std::vector<MCSymbol*> DeadBlockSyms;
   MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
   for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
-    OutStreamer.AddComment("Address taken block that was later removed");
-    OutStreamer.EmitLabel(DeadBlockSyms[i]);
+    OutStreamer->AddComment("Address taken block that was later removed");
+    OutStreamer->EmitLabel(DeadBlockSyms[i]);
+  }
+
+  if (CurrentFnBegin) {
+    if (MAI->useAssignmentForEHBegin()) {
+      MCSymbol *CurPos = OutContext.createTempSymbol();
+      OutStreamer->EmitLabel(CurPos);
+      OutStreamer->EmitAssignment(CurrentFnBegin,
+                                 MCSymbolRefExpr::Create(CurPos, OutContext));
+    } else {
+      OutStreamer->EmitLabel(CurrentFnBegin);
+    }
   }
 
   // Emit pre-function debug and/or EH information.
@@ -570,13 +596,13 @@ void AsmPrinter::EmitFunctionEntryLabel() {
     report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
                        "' label emitted multiple times to assembly file");
 
-  return OutStreamer.EmitLabel(CurrentFnSym);
+  return OutStreamer->EmitLabel(CurrentFnSym);
 }
 
 /// emitComments - Pretty-print comments for instructions.
 static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getParent()->getParent();
-  const TargetMachine &TM = MF->getTarget();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
   // Check for spills and reloads
   int FI;
@@ -586,24 +612,20 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   // We assume a single instruction only has a spill or reload, not
   // both.
   const MachineMemOperand *MMO;
-  if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI,
-                                                                       FI)) {
+  if (TII->isLoadFromStackSlotPostFE(&MI, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Reload\n";
     }
-  } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot(
-                 &MI, MMO, FI)) {
+  } else if (TII->hasLoadFromStackSlot(&MI, MMO, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Reload\n";
-  } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE(
-                 &MI, FI)) {
+  } else if (TII->isStoreToStackSlotPostFE(&MI, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Spill\n";
     }
-  } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot(
-                 &MI, MMO, FI)) {
+  } else if (TII->hasStoreToStackSlot(&MI, MMO, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Spill\n";
   }
@@ -617,10 +639,9 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
 /// that is an implicit def.
 void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
   unsigned RegNo = MI->getOperand(0).getReg();
-  OutStreamer.AddComment(
-      Twine("implicit-def: ") +
-      TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddComment(Twine("implicit-def: ") +
+                          MMI->getContext().getRegisterInfo()->getName(RegNo));
+  OutStreamer->AddBlankLine();
 }
 
 static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
@@ -629,11 +650,11 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
     const MachineOperand &Op = MI->getOperand(i);
     assert(Op.isReg() && "KILL instruction must have only register operands");
     Str += ' ';
-    Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg());
+    Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg());
     Str += (Op.isDef() ? "<def>" : "<kill>");
   }
-  AP.OutStreamer.AddComment(Str);
-  AP.OutStreamer.AddBlankLine();
+  AP.OutStreamer->AddComment(Str);
+  AP.OutStreamer->AddBlankLine();
 }
 
 /// emitDebugValueComment - This method handles the target-independent form
@@ -648,18 +669,18 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   raw_svector_ostream OS(Str);
   OS << "DEBUG_VALUE: ";
 
-  DIVariable V = MI->getDebugVariable();
-  if (V.getContext().isSubprogram()) {
-    StringRef Name = DISubprogram(V.getContext()).getDisplayName();
+  const DILocalVariable *V = MI->getDebugVariable();
+  if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
+    StringRef Name = SP->getDisplayName();
     if (!Name.empty())
       OS << Name << ":";
   }
-  OS << V.getName();
+  OS << V->getName();
 
-  DIExpression Expr = MI->getDebugExpression();
-  if (Expr.isVariablePiece())
-    OS << " [piece offset=" << Expr.getPieceOffset()
-       << " size=" << Expr.getPieceSize() << "]";
+  const DIExpression *Expr = MI->getDebugExpression();
+  if (Expr->isBitPiece())
+    OS << " [bit_piece offset=" << Expr->getBitPieceOffset()
+       << " size=" << Expr->getBitPieceSize() << "]";
   OS << " <- ";
 
   // The second operand is only an offset if it's an immediate.
@@ -691,8 +712,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
       Reg = MI->getOperand(0).getReg();
     } else {
       assert(MI->getOperand(0).isFI() && "Unknown operand type");
-      const TargetFrameLowering *TFI =
-          AP.TM.getSubtargetImpl()->getFrameLowering();
+      const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering();
       Offset += TFI->getFrameIndexReference(*AP.MF,
                                             MI->getOperand(0).getIndex(), Reg);
       Deref = true;
@@ -701,19 +721,19 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
       // Suppress offset, it is not meaningful here.
       OS << "undef";
       // NOTE: Want this comment at start of line, don't emit with AddComment.
-      AP.OutStreamer.emitRawComment(OS.str());
+      AP.OutStreamer->emitRawComment(OS.str());
       return true;
     }
     if (Deref)
       OS << '[';
-    OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg);
+    OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg);
   }
 
   if (Deref)
     OS << '+' << Offset << ']';
 
   // NOTE: Want this comment at start of line, don't emit with AddComment.
-  AP.OutStreamer.emitRawComment(OS.str());
+  AP.OutStreamer->emitRawComment(OS.str());
   return true;
 }
 
@@ -754,13 +774,15 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
   int FrameOffset = MI.getOperand(1).getImm();
 
   // Emit a symbol assignment.
-  OutStreamer.EmitAssignment(FrameAllocSym,
+  OutStreamer->EmitAssignment(FrameAllocSym,
                              MCConstantExpr::Create(FrameOffset, OutContext));
 }
 
 /// EmitFunctionBody - This method emits the body and trailer for a
 /// function.
 void AsmPrinter::EmitFunctionBody() {
+  EmitFunctionHeader();
+
   // Emit target-specific gunk before the function body.
   EmitFunctionBodyStart();
 
@@ -789,7 +811,7 @@ void AsmPrinter::EmitFunctionBody() {
       }
 
       if (isVerbose())
-        emitComments(MI, OutStreamer.GetCommentOS());
+        emitComments(MI, OutStreamer->GetCommentOS());
 
       switch (MI.getOpcode()) {
       case TargetOpcode::CFI_INSTRUCTION:
@@ -802,7 +824,7 @@ void AsmPrinter::EmitFunctionBody() {
 
       case TargetOpcode::EH_LABEL:
       case TargetOpcode::GC_LABEL:
-        OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol());
+        OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
         break;
       case TargetOpcode::INLINEASM:
         EmitInlineAsm(&MI);
@@ -841,13 +863,13 @@ void AsmPrinter::EmitFunctionBody() {
   // labels from collapsing together.  Just emit a noop.
   if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) {
     MCInst Noop;
-    TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop);
-    OutStreamer.AddComment("avoids zero-length function");
+    MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop);
+    OutStreamer->AddComment("avoids zero-length function");
 
     // Targets can opt-out of emitting the noop here by leaving the opcode
     // unspecified.
     if (Noop.getOpcode())
-      OutStreamer.EmitInstruction(Noop, getSubtargetInfo());
+      OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
   }
 
   const Function *F = MF->getFunction();
@@ -857,29 +879,41 @@ void AsmPrinter::EmitFunctionBody() {
     MCSymbol *Sym = GetBlockAddressSymbol(&BB);
     if (Sym->isDefined())
       continue;
-    OutStreamer.AddComment("Address of block that was removed by CodeGen");
-    OutStreamer.EmitLabel(Sym);
+    OutStreamer->AddComment("Address of block that was removed by CodeGen");
+    OutStreamer->EmitLabel(Sym);
   }
 
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
 
+  if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+      MAI->hasDotTypeDotSizeDirective()) {
+    // Create a symbol for the end of function.
+    CurrentFnEnd = createTempSymbol("func_end");
+    OutStreamer->EmitLabel(CurrentFnEnd);
+  }
+
   // If the target wants a .size directive for the size of the function, emit
   // it.
   if (MAI->hasDotTypeDotSizeDirective()) {
-    // Create a symbol for the end of function, so we can get the size as
-    // difference between the function label and the temp label.
-    MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
-    OutStreamer.EmitLabel(FnEndLabel);
-
+    // We can get the size as difference between the function label and the
+    // temp label.
     const MCExpr *SizeExp =
-      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext),
                               MCSymbolRefExpr::Create(CurrentFnSymForSize,
                                                       OutContext),
                               OutContext);
-    OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+    OutStreamer->EmitELFSize(CurrentFnSym, SizeExp);
   }
 
+  for (const HandlerInfo &HI : Handlers) {
+    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    HI.Handler->markFunctionEnd();
+  }
+
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo();
+
   // Emit post-function debug and/or EH information.
   for (const HandlerInfo &HI : Handlers) {
     NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
@@ -887,17 +921,106 @@ void AsmPrinter::EmitFunctionBody() {
   }
   MMI->EndFunction();
 
-  // Print out jump tables referenced by the function.
-  EmitJumpTableInfo();
+  OutStreamer->AddBlankLine();
+}
+
+/// \brief Compute the number of Global Variables that uses a Constant.
+static unsigned getNumGlobalVariableUses(const Constant *C) {
+  if (!C)
+    return 0;
+
+  if (isa<GlobalVariable>(C))
+    return 1;
+
+  unsigned NumUses = 0;
+  for (auto *CU : C->users())
+    NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU));
 
-  OutStreamer.AddBlankLine();
+  return NumUses;
+}
+
+/// \brief Only consider global GOT equivalents if at least one user is a
+/// cstexpr inside an initializer of another global variables. Also, don't
+/// handle cstexpr inside instructions. During global variable emission,
+/// candidates are skipped and are emitted later in case at least one cstexpr
+/// isn't replaced by a PC relative GOT entry access.
+static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
+                                     unsigned &NumGOTEquivUsers) {
+  // Global GOT equivalents are unnamed private globals with a constant
+  // pointer initializer to another global symbol. They must point to a
+  // GlobalVariable or Function, i.e., as GlobalValue.
+  if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() ||
+      !GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0)))
+    return false;
+
+  // To be a got equivalent, at least one of its users need to be a constant
+  // expression used by another global variable.
+  for (auto *U : GV->users())
+    NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U));
+
+  return NumGOTEquivUsers > 0;
+}
+
+/// \brief Unnamed constant global variables solely contaning a pointer to
+/// another globals variable is equivalent to a GOT table entry; it contains the
+/// the address of another symbol. Optimize it and replace accesses to these
+/// "GOT equivalents" by using the GOT entry for the final global instead.
+/// Compute GOT equivalent candidates among all global variables to avoid
+/// emitting them if possible later on, after it use is replaced by a GOT entry
+/// access.
+void AsmPrinter::computeGlobalGOTEquivs(Module &M) {
+  if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+    return;
+
+  for (const auto &G : M.globals()) {
+    unsigned NumGOTEquivUsers = 0;
+    if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers))
+      continue;
+
+    const MCSymbol *GOTEquivSym = getSymbol(&G);
+    GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers);
+  }
+}
+
+/// \brief Constant expressions using GOT equivalent globals may not be eligible
+/// for PC relative GOT entry conversion, in such cases we need to emit such
+/// globals we previously omitted in EmitGlobalVariable.
+void AsmPrinter::emitGlobalGOTEquivs() {
+  if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+    return;
+
+  SmallVector<const GlobalVariable *, 8> FailedCandidates;
+  for (auto &I : GlobalGOTEquivs) {
+    const GlobalVariable *GV = I.second.first;
+    unsigned Cnt = I.second.second;
+    if (Cnt)
+      FailedCandidates.push_back(GV);
+  }
+  GlobalGOTEquivs.clear();
+
+  for (auto *GV : FailedCandidates)
+    EmitGlobalVariable(GV);
 }
 
 bool AsmPrinter::doFinalization(Module &M) {
+  // Set the MachineFunction to nullptr so that we can catch attempted
+  // accesses to MF specific features at the module level and so that
+  // we can conditionalize accesses based on whether or not it is nullptr.
+  MF = nullptr;
+
+  // Gather all GOT equivalent globals in the module. We really need two
+  // passes over the globals: one to compute and another to avoid its emission
+  // in EmitGlobalVariable, otherwise we would not be able to handle cases
+  // where the got equivalent shows up before its use.
+  computeGlobalGOTEquivs(M);
+
   // Emit global variables.
   for (const auto &G : M.globals())
     EmitGlobalVariable(&G);
 
+  // Emit remaining GOT equivalent globals.
+  emitGlobalGOTEquivs();
+
   // Emit visibility info for declarations
   for (const Function &F : M) {
     if (!F.isDeclaration())
@@ -910,63 +1033,34 @@ bool AsmPrinter::doFinalization(Module &M) {
     EmitVisibility(Name, V, false);
   }
 
-  // Get information about jump-instruction tables to print.
-  JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>();
-
-  if (JITI && !JITI->getTables().empty()) {
-    unsigned Arch = Triple(getTargetTriple()).getArch();
-    bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
-    MCInst TrapInst;
-    TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst);
-    unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment());
-
-    // Emit the right section for these functions.
-    OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
-    for (const auto &KV : JITI->getTables()) {
-      uint64_t Count = 0;
-      for (const auto &FunPair : KV.second) {
-        // Emit the function labels to make this be a function entry point.
-        MCSymbol *FunSym =
-          OutContext.GetOrCreateSymbol(FunPair.second->getName());
-        EmitAlignment(LogAlignment);
-        if (IsThumb)
-          OutStreamer.EmitThumbFunc(FunSym);
-        if (MAI->hasDotTypeDotSizeDirective())
-          OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction);
-        OutStreamer.EmitLabel(FunSym);
-
-        // Emit the jump instruction to transfer control to the original
-        // function.
-        MCInst JumpToFun;
-        MCSymbol *TargetSymbol =
-          OutContext.GetOrCreateSymbol(FunPair.first->getName());
-        const MCSymbolRefExpr *TargetSymRef =
-          MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
-                                  OutContext);
-        TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch(
-            JumpToFun, TargetSymRef);
-        OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo());
-        ++Count;
-      }
-
-      // Emit enough padding instructions to fill up to the next power of two.
-      uint64_t Remaining = NextPowerOf2(Count) - Count;
-      for (uint64_t C = 0; C < Remaining; ++C) {
-        EmitAlignment(LogAlignment);
-        OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo());
-      }
-
-    }
-  }
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
   // Emit module flags.
   SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
   M.getModuleFlagsMetadata(ModuleFlags);
   if (!ModuleFlags.empty())
-    getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM);
+    TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM);
+
+  Triple TT(TM.getTargetTriple());
+  if (TT.isOSBinFormatELF()) {
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer->SwitchSection(TLOF.getDataRelSection());
+      const DataLayout *DL = TM.getDataLayout();
+
+      for (const auto &Stub : Stubs) {
+        OutStreamer->EmitLabel(Stub.first);
+        OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
+                                     DL->getPointerSize());
+      }
+    }
+  }
 
   // Make sure we wrote out everything we need.
-  OutStreamer.Flush();
+  OutStreamer->Flush();
 
   // Finalize debug and EH information.
   for (const HandlerInfo &HI : Handlers) {
@@ -989,31 +1083,31 @@ bool AsmPrinter::doFinalization(Module &M) {
     for (const auto &G : M.globals()) {
       if (!G.hasExternalWeakLinkage())
         continue;
-      OutStreamer.EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference);
+      OutStreamer->EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference);
     }
 
     for (const auto &F : M) {
       if (!F.hasExternalWeakLinkage())
         continue;
-      OutStreamer.EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference);
+      OutStreamer->EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference);
     }
   }
 
-  OutStreamer.AddBlankLine();
+  OutStreamer->AddBlankLine();
   for (const auto &Alias : M.aliases()) {
     MCSymbol *Name = getSymbol(&Alias);
 
     if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
-      OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+      OutStreamer->EmitSymbolAttribute(Name, MCSA_Global);
     else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
-      OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+      OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference);
     else
       assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
 
     EmitVisibility(Name, Alias.getVisibility());
 
     // Emit the directives as assignments aka .set:
-    OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
+    OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
   }
 
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -1027,27 +1121,26 @@ bool AsmPrinter::doFinalization(Module &M) {
 
   // Emit __morestack address if needed for indirect calls.
   if (MMI->usesMorestackAddr()) {
-    const MCSection *ReadOnlySection =
+    MCSection *ReadOnlySection =
         getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
                                                    /*C=*/nullptr);
-    OutStreamer.SwitchSection(ReadOnlySection);
+    OutStreamer->SwitchSection(ReadOnlySection);
 
     MCSymbol *AddrSymbol =
-        OutContext.GetOrCreateSymbol(StringRef("__morestack_addr"));
-    OutStreamer.EmitLabel(AddrSymbol);
+        OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
+    OutStreamer->EmitLabel(AddrSymbol);
 
-    const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout();
-    unsigned PtrSize = DL.getPointerSize(0);
-    OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
-                                PtrSize);
+    unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
+    OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
+                                 PtrSize);
   }
 
   // If we don't have any trampolines, then we don't require stack memory
   // to be executable. Some targets have a directive to declare this.
   Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
-    if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
-      OutStreamer.SwitchSection(S);
+    if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+      OutStreamer->SwitchSection(S);
 
   // Allow the target to emit any magic that it wants at the end of the file,
   // after everything else has gone out.
@@ -1056,29 +1149,44 @@ bool AsmPrinter::doFinalization(Module &M) {
   delete Mang; Mang = nullptr;
   MMI = nullptr;
 
-  OutStreamer.Finish();
-  OutStreamer.reset();
+  OutStreamer->Finish();
+  OutStreamer->reset();
 
   return false;
 }
 
+MCSymbol *AsmPrinter::getCurExceptionSym() {
+  if (!CurExceptionSym)
+    CurExceptionSym = createTempSymbol("exception");
+  return CurExceptionSym;
+}
+
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
   // Get the function symbol.
   CurrentFnSym = getSymbol(MF.getFunction());
   CurrentFnSymForSize = CurrentFnSym;
+  CurrentFnBegin = nullptr;
+  CurExceptionSym = nullptr;
+  bool NeedsLocalForSize = MAI->needsLocalForSize();
+  if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+      NeedsLocalForSize) {
+    CurrentFnBegin = createTempSymbol("func_begin");
+    if (NeedsLocalForSize)
+      CurrentFnSymForSize = CurrentFnBegin;
+  }
 
   if (isVerbose())
     LI = &getAnalysis<MachineLoopInfo>();
 }
 
 namespace {
-  // SectionCPs - Keep track the alignment, constpool entries per Section.
+// Keep track the alignment, constpool entries per Section.
   struct SectionCPs {
-    const MCSection *S;
+    MCSection *S;
     unsigned Alignment;
     SmallVector<unsigned, 4> CPEs;
-    SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+    SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {}
   };
 }
 
@@ -1100,13 +1208,13 @@ void AsmPrinter::EmitConstantPool() {
     unsigned Align = CPE.getAlignment();
 
     SectionKind Kind =
-        CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout());
+        CPE.getSectionKind(TM.getDataLayout());
 
     const Constant *C = nullptr;
     if (!CPE.isMachineConstantPoolEntry())
       C = CPE.Val.ConstVal;
 
-    const MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
+    MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
 
     // The number of sections are small, just do a linear search from the
     // last section to the first.
@@ -1139,7 +1247,7 @@ void AsmPrinter::EmitConstantPool() {
         continue;
 
       if (CurSection != CPSections[i].S) {
-        OutStreamer.SwitchSection(CPSections[i].S);
+        OutStreamer->SwitchSection(CPSections[i].S);
         EmitAlignment(Log2_32(CPSections[i].Alignment));
         CurSection = CPSections[i].S;
         Offset = 0;
@@ -1150,13 +1258,13 @@ void AsmPrinter::EmitConstantPool() {
       // Emit inter-object padding for alignment.
       unsigned AlignMask = CPE.getAlignment() - 1;
       unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
-      OutStreamer.EmitZeros(NewOffset - Offset);
+      OutStreamer->EmitZeros(NewOffset - Offset);
 
       Type *Ty = CPE.getType();
       Offset = NewOffset +
-               TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty);
+               TM.getDataLayout()->getTypeAllocSize(Ty);
 
-      OutStreamer.EmitLabel(Sym);
+      OutStreamer->EmitLabel(Sym);
       if (CPE.isMachineConstantPoolEntry())
         EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
       else
@@ -1169,7 +1277,7 @@ void AsmPrinter::EmitConstantPool() {
 /// by the current function to the current output stream.
 ///
 void AsmPrinter::EmitJumpTableInfo() {
-  const DataLayout *DL = MF->getSubtarget().getDataLayout();
+  const DataLayout *DL = MF->getTarget().getDataLayout();
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (!MJTI) return;
   if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1179,34 +1287,23 @@ void AsmPrinter::EmitJumpTableInfo() {
   // Pick the directive to use to print the jump table entries, and switch to
   // the appropriate section.
   const Function *F = MF->getFunction();
-  bool JTInDiffSection = false;
-  if (// In PIC mode, we need to emit the jump table to the same section as the
-      // function body itself, otherwise the label differences won't make sense.
-      // FIXME: Need a better predicate for this: what about custom entries?
-      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
-      // We should also do if the section name is NULL or function is declared
-      // in discardable section
-      // FIXME: this isn't the right predicate, should be based on the MCSection
-      // for the function.
-      F->isWeakForLinker()) {
-    OutStreamer.SwitchSection(
-        getObjFileLowering().SectionForGlobal(F, *Mang, TM));
-  } else {
-    // Otherwise, drop it in the readonly section.
-    const MCSection *ReadOnlySection =
-        getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
-                                                   /*C=*/nullptr);
-    OutStreamer.SwitchSection(ReadOnlySection);
-    JTInDiffSection = true;
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
+      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
+      *F);
+  if (JTInDiffSection) {
+    // Drop it in the readonly section.
+    MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM);
+    OutStreamer->SwitchSection(ReadOnlySection);
   }
 
   EmitAlignment(Log2_32(
-      MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout())));
+      MJTI->getEntryAlignment(*TM.getDataLayout())));
 
   // Jump tables in code sections are marked with a data_region directive
   // where that's supported.
   if (!JTInDiffSection)
-    OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionJT32);
 
   for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1219,7 +1316,7 @@ void AsmPrinter::EmitJumpTableInfo() {
     if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
         MAI->doesSetDirectiveSuppressesReloc()) {
       SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
-      const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+      const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
       const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
       for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
         const MachineBasicBlock *MBB = JTBBs[ii];
@@ -1229,8 +1326,9 @@ void AsmPrinter::EmitJumpTableInfo() {
         // .set LJTSet, LBB32-base
         const MCExpr *LHS =
           MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
-        OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
-                                MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+        OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+                                    MCBinaryExpr::CreateSub(LHS, Base,
+                                                            OutContext));
       }
     }
 
@@ -1241,15 +1339,15 @@ void AsmPrinter::EmitJumpTableInfo() {
     if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix())
       // FIXME: This doesn't have to have any specific name, just any randomly
       // named and numbered 'l' label would work.  Simplify GetJTISymbol.
-      OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+      OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
 
-    OutStreamer.EmitLabel(GetJTISymbol(JTI));
+    OutStreamer->EmitLabel(GetJTISymbol(JTI));
 
     for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
       EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
   }
   if (!JTInDiffSection)
-    OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+    OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
 }
 
 /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
@@ -1263,9 +1361,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   case MachineJumpTableInfo::EK_Inline:
     llvm_unreachable("Cannot emit EK_Inline jump table entry");
   case MachineJumpTableInfo::EK_Custom32:
-    Value =
-        TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry(
-            MJTI, MBB, UID, OutContext);
+    Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry(
+        MJTI, MBB, UID, OutContext);
     break;
   case MachineJumpTableInfo::EK_BlockAddress:
     // EK_BlockAddress - Each entry is a plain address of block, e.g.:
@@ -1277,7 +1374,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     // with a relocation as gp-relative, e.g.:
     //     .gprel32 LBB123
     MCSymbol *MBBSym = MBB->getSymbol();
-    OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
     return;
   }
 
@@ -1286,7 +1383,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     // with a relocation as gp-relative, e.g.:
     //     .gpdword LBB123
     MCSymbol *MBBSym = MBB->getSymbol();
-    OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
     return;
   }
 
@@ -1304,7 +1401,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
       break;
     }
     Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
-    const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+    const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
     const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext);
     Value = MCBinaryExpr::CreateSub(Value, Base, OutContext);
     break;
@@ -1314,8 +1411,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   assert(Value && "Unknown entry kind!");
 
   unsigned EntrySize =
-      MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout());
-  OutStreamer.EmitValue(Value, EntrySize);
+      MJTI->getEntrySize(*TM.getDataLayout());
+  OutStreamer->EmitValue(Value, EntrySize);
 }
 
 
@@ -1344,8 +1441,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
       StringRef Sym(".constructors_used");
-      OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
-                                      MCSA_Reference);
+      OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
+                                       MCSA_Reference);
     }
     return true;
   }
@@ -1356,8 +1453,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
       StringRef Sym(".destructors_used");
-      OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
-                                      MCSA_Reference);
+      OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
+                                       MCSA_Reference);
     }
     return true;
   }
@@ -1374,7 +1471,7 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
     const GlobalValue *GV =
       dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
     if (GV)
-      OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
+      OutStreamer->EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
   }
 }
 
@@ -1424,7 +1521,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
   }
 
   // Emit the function pointers in the target-specific order
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   unsigned Align = Log2_32(DL->getPointerPrefAlignment());
   std::stable_sort(Structors.begin(), Structors.end(),
                    [](const Structor &L,
@@ -1440,11 +1537,11 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
 
       KeySym = getSymbol(GV);
     }
-    const MCSection *OutputSection =
+    MCSection *OutputSection =
         (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
                 : Obj.getStaticDtorSection(S.Priority, KeySym));
-    OutStreamer.SwitchSection(OutputSection);
-    if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
+    OutStreamer->SwitchSection(OutputSection);
+    if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
       EmitAlignment(Align);
     EmitXXStructor(S.Func);
   }
@@ -1460,7 +1557,7 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
       assert(N->getNumOperands() == 1 &&
              "llvm.ident metadata entry can have only one operand");
       const MDString *S = cast<MDString>(N->getOperand(0));
-      OutStreamer.EmitIdent(S->getString());
+      OutStreamer->EmitIdent(S->getString());
     }
   }
 }
@@ -1472,19 +1569,19 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
 /// EmitInt8 - Emit a byte directive and value.
 ///
 void AsmPrinter::EmitInt8(int Value) const {
-  OutStreamer.EmitIntValue(Value, 1);
+  OutStreamer->EmitIntValue(Value, 1);
 }
 
 /// EmitInt16 - Emit a short directive and value.
 ///
 void AsmPrinter::EmitInt16(int Value) const {
-  OutStreamer.EmitIntValue(Value, 2);
+  OutStreamer->EmitIntValue(Value, 2);
 }
 
 /// EmitInt32 - Emit a long directive and value.
 ///
 void AsmPrinter::EmitInt32(int Value) const {
-  OutStreamer.EmitIntValue(Value, 4);
+  OutStreamer->EmitIntValue(Value, 4);
 }
 
 /// Emit something like ".long Hi-Lo" where the size in bytes of the directive
@@ -1492,6 +1589,10 @@ void AsmPrinter::EmitInt32(int Value) const {
 /// .set if it avoids relocations.
 void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                                      unsigned Size) const {
+  if (!MAI->doesDwarfUseRelocationsAcrossSections())
+    if (OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size))
+      return;
+
   // Get the Hi-Lo expression.
   const MCExpr *Diff =
     MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
@@ -1499,14 +1600,14 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                             OutContext);
 
   if (!MAI->doesSetDirectiveSuppressesReloc()) {
-    OutStreamer.EmitValue(Diff, Size);
+    OutStreamer->EmitValue(Diff, Size);
     return;
   }
 
   // Otherwise, emit with .set (aka assignment).
-  MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
-  OutStreamer.EmitAssignment(SetLabel, Diff);
-  OutStreamer.EmitSymbolValue(SetLabel, Size);
+  MCSymbol *SetLabel = createTempSymbol("set");
+  OutStreamer->EmitAssignment(SetLabel, Diff);
+  OutStreamer->EmitSymbolValue(SetLabel, Size);
 }
 
 /// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
@@ -1516,7 +1617,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
                                      unsigned Size,
                                      bool IsSectionRelative) const {
   if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
-    OutStreamer.EmitCOFFSecRel32(Label);
+    OutStreamer->EmitCOFFSecRel32(Label);
     return;
   }
 
@@ -1526,7 +1627,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
     Expr = MCBinaryExpr::CreateAdd(
         Expr, MCConstantExpr::Create(Offset, OutContext), OutContext);
 
-  OutStreamer.EmitValue(Expr, Size);
+  OutStreamer->EmitValue(Expr, Size);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1539,7 +1640,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
 //
 void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
   if (GV)
-    NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(),
+    NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(),
                                  NumBits);
 
   if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
@@ -1548,9 +1649,9 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
              static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
          "undefined behavior");
   if (getCurrentSection()->getKind().isText())
-    OutStreamer.EmitCodeAlignment(1u << NumBits);
+    OutStreamer->EmitCodeAlignment(1u << NumBits);
   else
-    OutStreamer.EmitValueToAlignment(1u << NumBits);
+    OutStreamer->EmitValueToAlignment(1u << NumBits);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1586,8 +1687,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C = ConstantFoldConstantExpression(
-            CE, TM.getSubtargetImpl()->getDataLayout()))
+    if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
       if (C != CE)
         return lowerConstant(C);
 
@@ -1601,7 +1701,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
       report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
-    const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout();
+    const DataLayout &DL = *TM.getDataLayout();
+
     // Generate a symbolic expression for the byte address
     APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
     cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
@@ -1625,7 +1726,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     return lowerConstant(CE->getOperand(0));
 
   case Instruction::IntToPtr: {
-    const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout();
+    const DataLayout &DL = *TM.getDataLayout();
+
     // Handle casts to pointers by changing them into casts to the appropriate
     // integer type.  This promotes constant folding and simplifies this code.
     Constant *Op = CE->getOperand(0);
@@ -1635,7 +1737,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
   }
 
   case Instruction::PtrToInt: {
-    const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout();
+    const DataLayout &DL = *TM.getDataLayout();
+
     // Support only foldable casts to/from pointers that can be eliminated by
     // changing the pointer to the appropriately sized integer type.
     Constant *Op = CE->getOperand(0);
@@ -1685,7 +1788,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
   }
 }
 
-static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP);
+static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP,
+                                   const Constant *BaseCV = nullptr,
+                                   uint64_t Offset = 0);
 
 /// isRepeatedByteSequence - Determine whether the given value is
 /// composed of a repeated sequence of identical bytes and return the
@@ -1709,7 +1814,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
     if (CI->getBitWidth() > 64) return -1;
 
     uint64_t Size =
-        TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType());
+        TM.getDataLayout()->getTypeAllocSize(V->getType());
     uint64_t Value = CI->getZExtValue();
 
     // Make sure the constant is at least 8 bits long and has a power
@@ -1754,26 +1859,26 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
   int Value = isRepeatedByteSequence(CDS, AP.TM);
   if (Value != -1) {
     uint64_t Bytes =
-        AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+        AP.TM.getDataLayout()->getTypeAllocSize(
             CDS->getType());
     // Don't emit a 1-byte object as a .fill.
     if (Bytes > 1)
-      return AP.OutStreamer.EmitFill(Bytes, Value);
+      return AP.OutStreamer->EmitFill(Bytes, Value);
   }
 
   // If this can be emitted with .ascii/.asciz, emit it as such.
   if (CDS->isString())
-    return AP.OutStreamer.EmitBytes(CDS->getAsString());
+    return AP.OutStreamer->EmitBytes(CDS->getAsString());
 
   // Otherwise, emit the values in successive locations.
   unsigned ElementByteSize = CDS->getElementByteSize();
   if (isa<IntegerType>(CDS->getElementType())) {
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
-                                                CDS->getElementAsInteger(i));
-      AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
-                                  ElementByteSize);
+        AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
+                                                 CDS->getElementAsInteger(i));
+      AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i),
+                                   ElementByteSize);
     }
   } else if (ElementByteSize == 4) {
     // FP Constants are printed as integer constants to avoid losing
@@ -1787,8 +1892,8 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
 
       F = CDS->getElementAsFloat(i);
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
-      AP.OutStreamer.EmitIntValue(I, 4);
+        AP.OutStreamer->GetCommentOS() << "float " << F << '\n';
+      AP.OutStreamer->EmitIntValue(I, 4);
     }
   } else {
     assert(CDS->getElementType()->isDoubleTy());
@@ -1800,34 +1905,36 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
 
       F = CDS->getElementAsDouble(i);
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
-      AP.OutStreamer.EmitIntValue(I, 8);
+        AP.OutStreamer->GetCommentOS() << "double " << F << '\n';
+      AP.OutStreamer->EmitIntValue(I, 8);
     }
   }
 
-  const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout &DL = *AP.TM.getDataLayout();
   unsigned Size = DL.getTypeAllocSize(CDS->getType());
   unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
                         CDS->getNumElements();
   if (unsigned Padding = Size - EmittedSize)
-    AP.OutStreamer.EmitZeros(Padding);
+    AP.OutStreamer->EmitZeros(Padding);
 
 }
 
-static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) {
+static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
+                                    const Constant *BaseCV, uint64_t Offset) {
   // See if we can aggregate some values.  Make sure it can be
   // represented as a series of bytes of the constant value.
   int Value = isRepeatedByteSequence(CA, AP.TM);
+  const DataLayout &DL = *AP.TM.getDataLayout();
 
   if (Value != -1) {
-    uint64_t Bytes =
-        AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
-            CA->getType());
-    AP.OutStreamer.EmitFill(Bytes, Value);
+    uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
+    AP.OutStreamer->EmitFill(Bytes, Value);
   }
   else {
-    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-      emitGlobalConstantImpl(CA->getOperand(i), AP);
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+      emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset);
+      Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
+    }
   }
 }
 
@@ -1835,36 +1942,37 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
     emitGlobalConstantImpl(CV->getOperand(i), AP);
 
-  const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout &DL = *AP.TM.getDataLayout();
   unsigned Size = DL.getTypeAllocSize(CV->getType());
   unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
                          CV->getType()->getNumElements();
   if (unsigned Padding = Size - EmittedSize)
-    AP.OutStreamer.EmitZeros(Padding);
+    AP.OutStreamer->EmitZeros(Padding);
 }
 
-static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
+static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP,
+                                     const Constant *BaseCV, uint64_t Offset) {
   // Print the fields in successive locations. Pad to align if needed!
-  const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = AP.TM.getDataLayout();
   unsigned Size = DL->getTypeAllocSize(CS->getType());
   const StructLayout *Layout = DL->getStructLayout(CS->getType());
   uint64_t SizeSoFar = 0;
   for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
     const Constant *Field = CS->getOperand(i);
 
+    // Print the actual field value.
+    emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar);
+
     // Check if padding is needed and insert one or more 0s.
     uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
     uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
                         - Layout->getElementOffset(i)) - FieldSize;
     SizeSoFar += FieldSize + PadSize;
 
-    // Now print the actual field value.
-    emitGlobalConstantImpl(Field, AP);
-
     // Insert padding - this may include padding to increase the size of the
     // current field up to the ABI size (if the struct is not packed) as well
     // as padding to ensure that the next field starts at the right offset.
-    AP.OutStreamer.EmitZeros(PadSize);
+    AP.OutStreamer->EmitZeros(PadSize);
   }
   assert(SizeSoFar == Layout->getSizeInBytes() &&
          "Layout of constant struct may be incorrect!");
@@ -1880,10 +1988,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
     CFP->getValueAPF().toString(StrVal);
 
     if (CFP->getType())
-      CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+      CFP->getType()->print(AP.OutStreamer->GetCommentOS());
     else
-      AP.OutStreamer.GetCommentOS() << "Printing <null> Type";
-    AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
+      AP.OutStreamer->GetCommentOS() << "Printing <null> Type";
+    AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n';
   }
 
   // Now iterate through the APInt chunks, emitting them in endian-correct
@@ -1895,32 +2003,32 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
 
   // PPC's long double has odd notions of endianness compared to how LLVM
   // handles it: p[0] goes first for *big* endian on PPC.
-  if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() &&
+  if (AP.TM.getDataLayout()->isBigEndian() &&
       !CFP->getType()->isPPC_FP128Ty()) {
     int Chunk = API.getNumWords() - 1;
 
     if (TrailingBytes)
-      AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes);
+      AP.OutStreamer->EmitIntValue(p[Chunk--], TrailingBytes);
 
     for (; Chunk >= 0; --Chunk)
-      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t));
+      AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t));
   } else {
     unsigned Chunk;
     for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
-      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t));
+      AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t));
 
     if (TrailingBytes)
-      AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes);
+      AP.OutStreamer->EmitIntValue(p[Chunk], TrailingBytes);
   }
 
   // Emit the tail padding for the long double.
-  const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
-  AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
-                           DL.getTypeStoreSize(CFP->getType()));
+  const DataLayout &DL = *AP.TM.getDataLayout();
+  AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
+                            DL.getTypeStoreSize(CFP->getType()));
 }
 
 static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
-  const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = AP.TM.getDataLayout();
   unsigned BitWidth = CI->getBitWidth();
 
   // Copy the value as we may massage the layout for constants whose bit width
@@ -1959,28 +2067,123 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
   const uint64_t *RawData = Realigned.getRawData();
   for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
     uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i];
-    AP.OutStreamer.EmitIntValue(Val, 8);
+    AP.OutStreamer->EmitIntValue(Val, 8);
   }
 
   if (ExtraBitsSize) {
     // Emit the extra bits after the 64-bits chunks.
 
     // Emit a directive that fills the expected size.
-    uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+    uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(
         CI->getType());
     Size -= (BitWidth / 64) * 8;
     assert(Size && Size * 8 >= ExtraBitsSize &&
            (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
            == ExtraBits && "Directive too small for extra bits.");
-    AP.OutStreamer.EmitIntValue(ExtraBits, Size);
+    AP.OutStreamer->EmitIntValue(ExtraBits, Size);
   }
 }
 
-static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
-  const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
+/// \brief Transform a not absolute MCExpr containing a reference to a GOT
+/// equivalent global, by a target specific GOT pc relative access to the
+/// final symbol.
+static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
+                                         const Constant *BaseCst,
+                                         uint64_t Offset) {
+  // The global @foo below illustrates a global that uses a got equivalent.
+  //
+  //  @bar = global i32 42
+  //  @gotequiv = private unnamed_addr constant i32* @bar
+  //  @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64),
+  //                             i64 ptrtoint (i32* @foo to i64))
+  //                        to i32)
+  //
+  // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually
+  // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the
+  // form:
+  //
+  //  foo = cstexpr, where
+  //    cstexpr := <gotequiv> - "." + <cst>
+  //    cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst>
+  //
+  // After canonicalization by EvaluateAsRelocatable `ME` turns into:
+  //
+  //  cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
+  //    gotpcrelcst := <offset from @foo base> + <cst>
+  //
+  MCValue MV;
+  if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
+    return;
+
+  const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol();
+  if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
+    return;
+
+  const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst);
+  if (!BaseGV)
+    return;
+
+  const MCSymbol *BaseSym = AP.getSymbol(BaseGV);
+  if (BaseSym != &MV.getSymB()->getSymbol())
+    return;
+
+  // Make sure to match:
+  //
+  //    gotpcrelcst := <offset from @foo base> + <cst>
+  //
+  // If gotpcrelcst is positive it means that we can safely fold the pc rel
+  // displacement into the GOTPCREL. We can also can have an extra offset <cst>
+  // if the target knows how to encode it.
+  //
+  int64_t GOTPCRelCst = Offset + MV.getConstant();
+  if (GOTPCRelCst < 0)
+    return;
+  if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0)
+    return;
+
+  // Emit the GOT PC relative to replace the got equivalent global, i.e.:
+  //
+  //  bar:
+  //    .long 42
+  //  gotequiv:
+  //    .quad bar
+  //  foo:
+  //    .long gotequiv - "." + <cst>
+  //
+  // is replaced by the target specific equivalent to:
+  //
+  //  bar:
+  //    .long 42
+  //  foo:
+  //    .long bar@GOTPCREL+<gotpcrelcst>
+  //
+  AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
+  const GlobalVariable *GV = Result.first;
+  int NumUses = (int)Result.second;
+  const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
+  const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
+  *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(
+      FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer);
+
+  // Update GOT equivalent usage information
+  --NumUses;
+  if (NumUses >= 0)
+    AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
+}
+
+static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
+                                   const Constant *BaseCV, uint64_t Offset) {
+  const DataLayout *DL = AP.TM.getDataLayout();
   uint64_t Size = DL->getTypeAllocSize(CV->getType());
+
+  // Globals with sub-elements such as combinations of arrays and structs
+  // are handled recursively by emitGlobalConstantImpl. Keep track of the
+  // constant symbol base and the current position with BaseCV and Offset.
+  if (!BaseCV && CV->hasOneUse())
+    BaseCV = dyn_cast<Constant>(CV->user_back());
+
   if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
-    return AP.OutStreamer.EmitZeros(Size);
+    return AP.OutStreamer->EmitZeros(Size);
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
     switch (Size) {
@@ -1989,9 +2192,9 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
     case 4:
     case 8:
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
-                                                CI->getZExtValue());
-      AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size);
+        AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
+                                                 CI->getZExtValue());
+      AP.OutStreamer->EmitIntValue(CI->getZExtValue(), Size);
       return;
     default:
       emitGlobalConstantLargeInt(CI, AP);
@@ -2003,7 +2206,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
     return emitGlobalConstantFP(CFP, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    AP.OutStreamer.EmitIntValue(0, Size);
+    AP.OutStreamer->EmitIntValue(0, Size);
     return;
   }
 
@@ -2011,10 +2214,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
     return emitGlobalConstantDataSequential(CDS, AP);
 
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return emitGlobalConstantArray(CVA, AP);
+    return emitGlobalConstantArray(CVA, AP, BaseCV, Offset);
 
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return emitGlobalConstantStruct(CVS, AP);
+    return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset);
 
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
@@ -2026,7 +2229,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
       // If the constant expression's size is greater than 64-bits, then we have
       // to emit the value in chunks. Try to constant fold the value and emit it
       // that way.
-      Constant *New = ConstantFoldConstantExpression(CE, DL);
+      Constant *New = ConstantFoldConstantExpression(CE, *DL);
       if (New && New != CE)
         return emitGlobalConstantImpl(New, AP);
     }
@@ -2037,19 +2240,27 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
 
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  AP.OutStreamer.EmitValue(AP.lowerConstant(CV), Size);
+  const MCExpr *ME = AP.lowerConstant(CV);
+
+  // Since lowerConstant already folded and got rid of all IR pointer and
+  // integer casts, detect GOT equivalent accesses by looking into the MCExpr
+  // directly.
+  if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel())
+    handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset);
+
+  AP.OutStreamer->EmitValue(ME, Size);
 }
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
 void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
   uint64_t Size =
-      TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
+      TM.getDataLayout()->getTypeAllocSize(CV->getType());
   if (Size)
     emitGlobalConstantImpl(CV, *this);
   else if (MAI->hasSubsectionsViaSymbols()) {
     // If the global has zero size, emit a single byte so that two labels don't
     // look like they are at the same location.
-    OutStreamer.EmitIntValue(0, 1);
+    OutStreamer->EmitIntValue(0, 1);
   }
 }
 
@@ -2069,23 +2280,10 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
 // Symbol Lowering Routines.
 //===----------------------------------------------------------------------===//
 
-/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
-/// temporary label with the specified stem and unique ID.
-MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
-                                      Name + Twine(ID));
-}
-
-/// GetTempSymbol - Return an assembler temporary label with the specified
-/// stem.
-MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
-                                      Name);
+MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const {
+  return OutContext.createTempSymbol(Name, true);
 }
 
-
 MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
   return MMI->getAddrLabelSymbol(BA->getBasicBlock());
 }
@@ -2096,8 +2294,8 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
 
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
 MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  return OutContext.GetOrCreateSymbol
+  const DataLayout *DL = TM.getDataLayout();
+  return OutContext.getOrCreateSymbol
     (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
      + "_" + Twine(CPID));
 }
@@ -2110,8 +2308,8 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
 /// GetJTSetSymbol - Return the symbol for the specified jump table .set
 /// FIXME: privatize to AsmPrinter.
 MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
-  return OutContext.GetOrCreateSymbol
+  const DataLayout *DL = TM.getDataLayout();
+  return OutContext.getOrCreateSymbol
   (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
    Twine(UID) + "_set_" + Twine(MBBID));
 }
@@ -2127,7 +2325,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
 MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
   SmallString<60> NameStr;
   Mang->getNameWithPrefix(NameStr, Sym);
-  return OutContext.GetOrCreateSymbol(NameStr.str());
+  return OutContext.getOrCreateSymbol(NameStr);
 }
 
 
@@ -2172,16 +2370,16 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
   // If this block is not a loop header, just print out what is the loop header
   // and return.
   if (Header != &MBB) {
-    AP.OutStreamer.AddComment("  in Loop: Header=BB" +
-                              Twine(AP.getFunctionNumber())+"_" +
-                              Twine(Loop->getHeader()->getNumber())+
-                              " Depth="+Twine(Loop->getLoopDepth()));
+    AP.OutStreamer->AddComment("  in Loop: Header=BB" +
+                               Twine(AP.getFunctionNumber())+"_" +
+                               Twine(Loop->getHeader()->getNumber())+
+                               " Depth="+Twine(Loop->getLoopDepth()));
     return;
   }
 
   // Otherwise, it is a loop header.  Print out information about child and
   // parent loops.
-  raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+  raw_ostream &OS = AP.OutStreamer->GetCommentOS();
 
   PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
 
@@ -2212,18 +2410,18 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
   if (MBB.hasAddressTaken()) {
     const BasicBlock *BB = MBB.getBasicBlock();
     if (isVerbose())
-      OutStreamer.AddComment("Block address taken");
+      OutStreamer->AddComment("Block address taken");
 
     std::vector<MCSymbol*> Symbols = MMI->getAddrLabelSymbolToEmit(BB);
     for (auto *Sym : Symbols)
-      OutStreamer.EmitLabel(Sym);
+      OutStreamer->EmitLabel(Sym);
   }
 
   // Print some verbose block comments.
   if (isVerbose()) {
     if (const BasicBlock *BB = MBB.getBasicBlock())
       if (BB->hasName())
-        OutStreamer.AddComment("%" + BB->getName());
+        OutStreamer->AddComment("%" + BB->getName());
     emitBasicBlockLoopComments(MBB, LI, *this);
   }
 
@@ -2231,10 +2429,10 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
   if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) {
     if (isVerbose()) {
       // NOTE: Want this comment at start of line, don't emit with AddComment.
-      OutStreamer.emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
+      OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
     }
   } else {
-    OutStreamer.EmitLabel(MBB.getSymbol());
+    OutStreamer->EmitLabel(MBB.getSymbol());
   }
 }
 
@@ -2256,7 +2454,7 @@ void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
   }
 
   if (Attr != MCSA_Invalid)
-    OutStreamer.EmitSymbolAttribute(Sym, Attr);
+    OutStreamer->EmitSymbolAttribute(Sym, Attr);
 }
 
 /// isBlockOnlyReachableByFallthough - Return true if the basic block has
@@ -2335,3 +2533,5 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
 
 /// Pin vtable to this file.
 AsmPrinterHandler::~AsmPrinterHandler() {}
+
+void AsmPrinterHandler::markFunctionEnd() {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 00681f6..3258961 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -12,45 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "ByteStreamer.h"
+#include "DwarfDebug.h"
 #include "DwarfExpression.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
-void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
-  BS.EmitInt8(
-      Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
-                  : dwarf::OperationEncodingString(Op));
-}
-
-void DebugLocDwarfExpression::EmitSigned(int Value) {
-  BS.EmitSLEB128(Value, Twine(Value));
-}
-
-void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) {
-  BS.EmitULEB128(Value, Twine(Value));
-}
-
-bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
-  // This information is not available while emitting .debug_loc entries.
-  return false;
-}
-
 //===----------------------------------------------------------------------===//
 // Dwarf Emission Helper Routines
 //===----------------------------------------------------------------------===//
@@ -58,30 +42,30 @@ bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
 /// EmitSLEB128 - emit the specified signed leb128 value.
 void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
   if (isVerbose() && Desc)
-    OutStreamer.AddComment(Desc);
+    OutStreamer->AddComment(Desc);
 
-  OutStreamer.EmitSLEB128IntValue(Value);
+  OutStreamer->EmitSLEB128IntValue(Value);
 }
 
 /// EmitULEB128 - emit the specified signed leb128 value.
 void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
                              unsigned PadTo) const {
   if (isVerbose() && Desc)
-    OutStreamer.AddComment(Desc);
+    OutStreamer->AddComment(Desc);
 
-  OutStreamer.EmitULEB128IntValue(Value, PadTo);
+  OutStreamer->EmitULEB128IntValue(Value, PadTo);
 }
 
 /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
 void AsmPrinter::EmitCFAByte(unsigned Val) const {
   if (isVerbose()) {
     if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64)
-      OutStreamer.AddComment("DW_CFA_offset + Reg (" +
-                             Twine(Val - dwarf::DW_CFA_offset) + ")");
+      OutStreamer->AddComment("DW_CFA_offset + Reg (" +
+                              Twine(Val - dwarf::DW_CFA_offset) + ")");
     else
-      OutStreamer.AddComment(dwarf::CallFrameString(Val));
+      OutStreamer->AddComment(dwarf::CallFrameString(Val));
   }
-  OutStreamer.EmitIntValue(Val, 1);
+  OutStreamer->EmitIntValue(Val, 1);
 }
 
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
@@ -132,13 +116,13 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
 void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
   if (isVerbose()) {
     if (Desc)
-      OutStreamer.AddComment(Twine(Desc) + " Encoding = " +
-                             Twine(DecodeDWARFEncoding(Val)));
+      OutStreamer->AddComment(Twine(Desc) + " Encoding = " +
+                              Twine(DecodeDWARFEncoding(Val)));
     else
-      OutStreamer.AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val));
+      OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val));
   }
 
-  OutStreamer.EmitIntValue(Val, 1);
+  OutStreamer->EmitIntValue(Val, 1);
 }
 
 /// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
@@ -150,7 +134,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   default:
     llvm_unreachable("Invalid encoded value.");
   case dwarf::DW_EH_PE_absptr:
-    return TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+    return TM.getDataLayout()->getPointerSize();
   case dwarf::DW_EH_PE_udata2:
     return 2;
   case dwarf::DW_EH_PE_udata4:
@@ -166,10 +150,11 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
     const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
     const MCExpr *Exp =
-        TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, OutStreamer);
-    OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
+        TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI,
+                                     *OutStreamer);
+    OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
   } else
-    OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
+    OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
 }
 
 /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
@@ -179,63 +164,43 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
 ///
 /// SectionLabel is a temporary label emitted at the start of the section that
 /// Label lives in.
-void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
-                                   const MCSymbol *SectionLabel) const {
+void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const {
   // On COFF targets, we have to emit the special .secrel32 directive.
   if (MAI->needsDwarfSectionOffsetDirective()) {
-    OutStreamer.EmitCOFFSecRel32(Label);
+    OutStreamer->EmitCOFFSecRel32(Label);
     return;
   }
 
-  // Get the section that we're referring to, based on SectionLabel.
-  const MCSection &Section = SectionLabel->getSection();
-
-  // If Label has already been emitted, verify that it is in the same section as
-  // section label for sanity.
-  assert((!Label->isInSection() || &Label->getSection() == &Section) &&
-         "Section offset using wrong section base for label");
-
-  // If the section in question will end up with an address of 0 anyway, we can
-  // just emit an absolute reference to save a relocation.
-  if (Section.isBaseAddressKnownZero()) {
-    OutStreamer.EmitSymbolValue(Label, 4);
+  // If the format uses relocations with dwarf, refer to the symbol directly.
+  if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+    OutStreamer->EmitSymbolValue(Label, 4);
     return;
   }
 
   // Otherwise, emit it as a label difference from the start of the section.
-  EmitLabelDifference(Label, SectionLabel, 4);
+  EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
 }
 
-// Some targets do not provide a DWARF register number for every
-// register.  This function attempts to emit a DWARF register by
-// emitting a piece of a super-register or by piecing together
-// multiple subregisters that alias the register.
-void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
-                                     const MachineLocation &MLoc,
-                                     unsigned PieceSizeInBits,
-                                     unsigned PieceOffsetInBits) const {
-  assert(MLoc.isReg() && "MLoc must be a register");
-  DebugLocDwarfExpression Expr(*this, Streamer);
-  Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits);
-}
+void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
+  if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+    emitSectionOffset(S.getSymbol());
+    return;
+  }
 
-void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer,
-                                  unsigned PieceSizeInBits,
-                                  unsigned PieceOffsetInBits) const {
-  DebugLocDwarfExpression Expr(*this, Streamer);
-  Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
+  // Just emit the offset directly; no need for symbol math.
+  EmitInt32(S.getOffset());
 }
 
 /// EmitDwarfRegOp - Emit dwarf register operation.
 void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
-                                const MachineLocation &MLoc,
-                                bool Indirect) const {
-  DebugLocDwarfExpression Expr(*this, Streamer);
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
-  int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+                                const MachineLocation &MLoc) const {
+  DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(),
+                               getDwarfDebug()->getDwarfVersion(), Streamer);
+  const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
+  int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
   if (Reg < 0) {
     // We assume that pointers are always in an addressable register.
-    if (Indirect || MLoc.isIndirect())
+    if (MLoc.isIndirect())
       // FIXME: We have no reasonable way of handling errors in here. The
       // caller might be in the middle of a dwarf expression. We should
       // probably assert that Reg >= 0 once debug info generation is more
@@ -251,9 +216,7 @@ void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
   }
 
   if (MLoc.isIndirect())
-    Expr.AddRegIndirect(Reg, MLoc.getOffset(), Indirect);
-  else if (Indirect)
-    Expr.AddRegIndirect(Reg, 0, false);
+    Expr.AddRegIndirect(Reg, MLoc.getOffset());
   else
     Expr.AddReg(Reg);
 }
@@ -267,25 +230,82 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   default:
     llvm_unreachable("Unexpected instruction");
   case MCCFIInstruction::OpDefCfaOffset:
-    OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset());
+    OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
     break;
   case MCCFIInstruction::OpDefCfa:
-    OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
+    OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
     break;
   case MCCFIInstruction::OpDefCfaRegister:
-    OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister());
+    OutStreamer->EmitCFIDefCfaRegister(Inst.getRegister());
     break;
   case MCCFIInstruction::OpOffset:
-    OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset());
+    OutStreamer->EmitCFIOffset(Inst.getRegister(), Inst.getOffset());
     break;
   case MCCFIInstruction::OpRegister:
-    OutStreamer.EmitCFIRegister(Inst.getRegister(), Inst.getRegister2());
+    OutStreamer->EmitCFIRegister(Inst.getRegister(), Inst.getRegister2());
     break;
   case MCCFIInstruction::OpWindowSave:
-    OutStreamer.EmitCFIWindowSave();
+    OutStreamer->EmitCFIWindowSave();
     break;
   case MCCFIInstruction::OpSameValue:
-    OutStreamer.EmitCFISameValue(Inst.getRegister());
+    OutStreamer->EmitCFISameValue(Inst.getRegister());
     break;
   }
 }
+
+void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
+  // Get the abbreviation for this DIE.
+  const DIEAbbrev &Abbrev = Die.getAbbrev();
+
+  // Emit the code (index) for the abbreviation.
+  if (isVerbose())
+    OutStreamer->AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
+                            "] 0x" + Twine::utohexstr(Die.getOffset()) +
+                            ":0x" + Twine::utohexstr(Die.getSize()) + " " +
+                            dwarf::TagString(Abbrev.getTag()));
+  EmitULEB128(Abbrev.getNumber());
+
+  const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+
+  // Emit the DIE attribute values.
+  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+    dwarf::Attribute Attr = AbbrevData[i].getAttribute();
+    dwarf::Form Form = AbbrevData[i].getForm();
+    assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+    if (isVerbose()) {
+      OutStreamer->AddComment(dwarf::AttributeString(Attr));
+      if (Attr == dwarf::DW_AT_accessibility)
+        OutStreamer->AddComment(dwarf::AccessibilityString(
+            cast<DIEInteger>(Values[i])->getValue()));
+    }
+
+    // Emit an attribute using the defined form.
+    Values[i]->EmitValue(this, Form);
+  }
+
+  // Emit the DIE children if any.
+  if (Abbrev.hasChildren()) {
+    for (auto &Child : Die.getChildren())
+      emitDwarfDIE(*Child);
+
+    OutStreamer->AddComment("End Of Children Mark");
+    EmitInt8(0);
+  }
+}
+
+void
+AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const {
+  // For each abbrevation.
+  for (const DIEAbbrev *Abbrev : Abbrevs) {
+    // Emit the abbrevations code (base 1 index.)
+    EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+    // Emit the abbreviations data.
+    Abbrev->Emit(this);
+  }
+
+  // Mark end of abbreviations.
+  EmitULEB128(0, "EOM(3)");
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index 31867dd..f1efe9d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -41,6 +41,10 @@ public:
   /// call.
   virtual void beginFunction(const MachineFunction *MF) = 0;
 
+  // \brief Emit any of function marker (like .cfi_endproc). This is called
+  // before endFunction and cannot switch sections.
+  virtual void markFunctionEnd();
+
   /// \brief Gather post-function debug information.
   /// Please note that some AsmPrinter implementations may not call
   /// beginFunction at all.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index f6ce4a0..e7631dd 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -72,7 +73,9 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
 }
 
 /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+                               const MCTargetOptions &MCOptions,
+                               const MDNode *LocMDNode,
                                InlineAsm::AsmDialect Dialect) const {
   assert(!Str.empty() && "Can't emit empty inline asm block");
 
@@ -89,10 +92,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
   const MCAsmInfo *MCAI = TM.getMCAsmInfo();
   assert(MCAI && "No MCAsmInfo");
   if (!MCAI->useIntegratedAssembler() &&
-      !OutStreamer.isIntegratedAssemblerRequired()) {
-    emitInlineAsmStart(TM.getSubtarget<MCSubtargetInfo>());
-    OutStreamer.EmitRawText(Str);
-    emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), nullptr);
+      !OutStreamer->isIntegratedAssemblerRequired()) {
+    emitInlineAsmStart();
+    OutStreamer->EmitRawText(Str);
+    emitInlineAsmEnd(STI, nullptr);
     return;
   }
 
@@ -122,27 +125,21 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
   SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
 
   std::unique_ptr<MCAsmParser> Parser(
-      createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI));
-
-  // Initialize the parser with a fresh subtarget info. It is better to use a
-  // new STI here because the parser may modify it and we do not want those
-  // modifications to persist after parsing the inlineasm. The modifications
-  // made by the parser will be seen by the code emitters because it passes
-  // the current STI down to the EncodeInstruction() method.
-  std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
-      TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
-
-  // Preserve a copy of the original STI because the parser may modify it.  For
-  // example, when switching between arm and thumb mode. If the target needs to
-  // emit code to return to the original state it can do so in
-  // emitInlineAsmEnd().
-  MCSubtargetInfo STIOrig = *STI;
+      createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI));
 
-  MCTargetOptions MCOptions;
-  if (MF)
-    MCOptions = MF->getTarget().Options.MCOptions;
-  std::unique_ptr<MCTargetAsmParser> TAP(
-      TM.getTarget().createMCAsmParser(*STI, *Parser, *MII, MCOptions));
+  // Create a temporary copy of the original STI because the parser may modify
+  // it. For example, when switching between arm and thumb mode. If the target
+  // needs to emit code to return to the original state it can do so in
+  // emitInlineAsmEnd().
+  MCSubtargetInfo TmpSTI = STI;
+
+  // We create a new MCInstrInfo here since we might be at the module level
+  // and not have a MachineFunction to initialize the TargetInstrInfo from and
+  // we only need MCInstrInfo for asm parsing. We create one unconditionally
+  // because it's not subtarget dependent.
+  std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
+  std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
+      TmpSTI, *Parser, *MII, MCOptions));
   if (!TAP)
     report_fatal_error("Inline asm not supported by this streamer because"
                        " we don't have an asm parser for this target\n");
@@ -153,11 +150,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
     TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
   }
 
-  emitInlineAsmStart(STIOrig);
+  emitInlineAsmStart();
   // Don't implicitly switch to the text section before the asm.
   int Res = Parser->Run(/*NoInitialTextSection*/ true,
                         /*NoFinalize*/ true);
-  emitInlineAsmEnd(STIOrig, STI.get());
+  emitInlineAsmEnd(STI, &TmpSTI);
   if (Res && !HasDiagHandler)
     report_fatal_error("Error parsing inline asm\n");
 }
@@ -452,14 +449,14 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   // If this asmstr is empty, just print the #APP/#NOAPP markers.
   // These are useful to see where empty asm's wound up.
   if (AsmStr[0] == 0) {
-    OutStreamer.emitRawComment(MAI->getInlineAsmStart());
-    OutStreamer.emitRawComment(MAI->getInlineAsmEnd());
+    OutStreamer->emitRawComment(MAI->getInlineAsmStart());
+    OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
     return;
   }
 
   // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use emitRawComment.
-  OutStreamer.emitRawComment(MAI->getInlineAsmStart());
+  OutStreamer->emitRawComment(MAI->getInlineAsmStart());
 
   // Get the !srcloc metadata node if we have it, and decode the loc cookie from
   // it.
@@ -492,11 +489,17 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   else
     EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
 
-  EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
+  // Reset SanitizeAddress based on the function's attribute.
+  MCTargetOptions MCOptions = TM.Options.MCOptions;
+  MCOptions.SanitizeAddress =
+      MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress);
+
+  EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
+                MI->getInlineAsmDialect());
 
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use emitRawComment.
-  OutStreamer.emitRawComment(MAI->getInlineAsmEnd());
+  OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
 }
 
 
@@ -508,7 +511,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
 /// for their own strange codes.
 void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
                               const char *Code) const {
-  const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+  const DataLayout *DL = TM.getDataLayout();
   if (!strcmp(Code, "private")) {
     OS << DL->getPrivateGlobalPrefix();
   } else if (!strcmp(Code, "comment")) {
@@ -569,7 +572,7 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
-void AsmPrinter::emitInlineAsmStart(const MCSubtargetInfo &StartInfo) const {}
+void AsmPrinter::emitInlineAsmStart() const {}
 
 void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
                                   const MCSubtargetInfo *EndInfo) const {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 42be114..0cc829f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -19,6 +19,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/LEB128.h"
+#include <string>
 
 namespace llvm {
 class ByteStreamer {
@@ -38,15 +40,15 @@ private:
 public:
   APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}
   void EmitInt8(uint8_t Byte, const Twine &Comment) override {
-    AP.OutStreamer.AddComment(Comment);
+    AP.OutStreamer->AddComment(Comment);
     AP.EmitInt8(Byte);
   }
   void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
-    AP.OutStreamer.AddComment(Comment);
+    AP.OutStreamer->AddComment(Comment);
     AP.EmitSLEB128(DWord);
   }
   void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
-    AP.OutStreamer.AddComment(Comment);
+    AP.OutStreamer->AddComment(Comment);
     AP.EmitULEB128(DWord);
   }
 };
@@ -66,6 +68,41 @@ class HashingByteStreamer : public ByteStreamer {
     Hash.addULEB128(DWord);
   }
 };
+
+class BufferByteStreamer : public ByteStreamer {
+private:
+  SmallVectorImpl<char> &Buffer;
+  SmallVectorImpl<std::string> &Comments;
+
+  /// \brief Only verbose textual output needs comments.  This will be set to
+  /// true for that case, and false otherwise.  If false, comments passed in to
+  /// the emit methods will be ignored.
+  bool GenerateComments;
+
+public:
+  BufferByteStreamer(SmallVectorImpl<char> &Buffer,
+                     SmallVectorImpl<std::string> &Comments,
+                     bool GenerateComments)
+  : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {}
+  void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+    Buffer.push_back(Byte);
+    if (GenerateComments)
+      Comments.push_back(Comment.str());
+  }
+  void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+    raw_svector_ostream OSE(Buffer);
+    encodeSLEB128(DWord, OSE);
+    if (GenerateComments)
+      Comments.push_back(Comment.str());
+  }
+  void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+    raw_svector_ostream OSE(Buffer);
+    encodeULEB128(DWord, OSE);
+    if (GenerateComments)
+      Comments.push_back(Comment.str());
+  }
+};
+
 }
 
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 64ba56b..1ccffe9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -60,7 +61,7 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
 
 /// Emit - Print the abbreviation using the specified asm printer.
 ///
-void DIEAbbrev::Emit(AsmPrinter *AP) const {
+void DIEAbbrev::Emit(const AsmPrinter *AP) const {
   // Emit its Dwarf tag type.
   AP->EmitULEB128(Tag, dwarf::TagString(Tag));
 
@@ -190,9 +191,67 @@ void DIE::dump() {
 }
 #endif
 
-void DIEValue::anchor() { }
+void DIEValue::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+  switch (Ty) {
+#define EMIT_VALUE_IMPL(Kind)                                                  \
+  case is##Kind:                                                               \
+    cast<DIE##Kind>(this)->EmitValueImpl(AP, Form);                            \
+    break;
+    EMIT_VALUE_IMPL(Integer)
+    EMIT_VALUE_IMPL(String)
+    EMIT_VALUE_IMPL(Expr)
+    EMIT_VALUE_IMPL(Label)
+    EMIT_VALUE_IMPL(Delta)
+    EMIT_VALUE_IMPL(Entry)
+    EMIT_VALUE_IMPL(TypeSignature)
+    EMIT_VALUE_IMPL(Block)
+    EMIT_VALUE_IMPL(Loc)
+    EMIT_VALUE_IMPL(LocList)
+#undef EMIT_VALUE_IMPL
+  }
+}
+
+unsigned DIEValue::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+  switch (Ty) {
+#define SIZE_OF_IMPL(Kind)                                                     \
+  case is##Kind:                                                               \
+    return cast<DIE##Kind>(this)->SizeOfImpl(AP, Form);
+    SIZE_OF_IMPL(Integer)
+    SIZE_OF_IMPL(String)
+    SIZE_OF_IMPL(Expr)
+    SIZE_OF_IMPL(Label)
+    SIZE_OF_IMPL(Delta)
+    SIZE_OF_IMPL(Entry)
+    SIZE_OF_IMPL(TypeSignature)
+    SIZE_OF_IMPL(Block)
+    SIZE_OF_IMPL(Loc)
+    SIZE_OF_IMPL(LocList)
+#undef SIZE_OF_IMPL
+  }
+  llvm_unreachable("Unknown DIE kind");
+}
 
 #ifndef NDEBUG
+void DIEValue::print(raw_ostream &O) const {
+  switch (Ty) {
+#define PRINT_IMPL(Kind)                                                       \
+  case is##Kind:                                                               \
+    cast<DIE##Kind>(this)->printImpl(O);                                       \
+    break;
+    PRINT_IMPL(Integer)
+    PRINT_IMPL(String)
+    PRINT_IMPL(Expr)
+    PRINT_IMPL(Label)
+    PRINT_IMPL(Delta)
+    PRINT_IMPL(Entry)
+    PRINT_IMPL(TypeSignature)
+    PRINT_IMPL(Block)
+    PRINT_IMPL(Loc)
+    PRINT_IMPL(LocList)
+#undef PRINT_IMPL
+  }
+}
+
 void DIEValue::dump() const {
   print(dbgs());
 }
@@ -204,13 +263,13 @@ void DIEValue::dump() const {
 
 /// EmitValue - Emit integer of appropriate size.
 ///
-void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEInteger::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
   unsigned Size = ~0U;
   switch (Form) {
   case dwarf::DW_FORM_flag_present:
     // Emit something to keep the lines and comments in sync.
     // FIXME: Is there a better way to do this?
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddBlankLine();
     return;
   case dwarf::DW_FORM_flag:  // Fall thru
   case dwarf::DW_FORM_ref1:  // Fall thru
@@ -218,6 +277,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_ref2:  // Fall thru
   case dwarf::DW_FORM_data2: Size = 2; break;
   case dwarf::DW_FORM_sec_offset: // Fall thru
+  case dwarf::DW_FORM_strp: // Fall thru
   case dwarf::DW_FORM_ref4:  // Fall thru
   case dwarf::DW_FORM_data4: Size = 4; break;
   case dwarf::DW_FORM_ref8:  // Fall thru
@@ -229,14 +289,17 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
   case dwarf::DW_FORM_addr:
     Size = Asm->getDataLayout().getPointerSize(); break;
+  case dwarf::DW_FORM_ref_addr:
+    Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
+    break;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
-  Asm->OutStreamer.EmitIntValue(Integer, Size);
+  Asm->OutStreamer->EmitIntValue(Integer, Size);
 }
 
 /// SizeOf - Determine size of integer value in bytes.
 ///
-unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEInteger::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   switch (Form) {
   case dwarf::DW_FORM_flag_present: return 0;
   case dwarf::DW_FORM_flag:  // Fall thru
@@ -245,6 +308,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
   case dwarf::DW_FORM_ref2:  // Fall thru
   case dwarf::DW_FORM_data2: return sizeof(int16_t);
   case dwarf::DW_FORM_sec_offset: // Fall thru
+  case dwarf::DW_FORM_strp: // Fall thru
   case dwarf::DW_FORM_ref4:  // Fall thru
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
   case dwarf::DW_FORM_ref8:  // Fall thru
@@ -255,12 +319,16 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
   case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
   case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
+  case dwarf::DW_FORM_ref_addr:
+    if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
+      return AP->getDataLayout().getPointerSize();
+    return sizeof(int32_t);
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
 
 #ifndef NDEBUG
-void DIEInteger::print(raw_ostream &O) const {
+void DIEInteger::printImpl(raw_ostream &O) const {
   O << "Int: " << (int64_t)Integer << "  0x";
   O.write_hex(Integer);
 }
@@ -272,13 +340,13 @@ void DIEInteger::print(raw_ostream &O) const {
 
 /// EmitValue - Emit expression value.
 ///
-void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
-  AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form));
+void DIEExpr::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+  AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of expression value in bytes.
 ///
-unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEExpr::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
@@ -286,10 +354,7 @@ unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIEExpr::print(raw_ostream &O) const {
-  O << "Expr: ";
-  Expr->print(O);
-}
+void DIEExpr::printImpl(raw_ostream &O) const { O << "Expr: " << *Expr; }
 #endif
 
 //===----------------------------------------------------------------------===//
@@ -298,7 +363,7 @@ void DIEExpr::print(raw_ostream &O) const {
 
 /// EmitValue - Emit label value.
 ///
-void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIELabel::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   AP->EmitLabelReference(Label, SizeOf(AP, Form),
                          Form == dwarf::DW_FORM_strp ||
                              Form == dwarf::DW_FORM_sec_offset ||
@@ -307,7 +372,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
 
 /// SizeOf - Determine size of label value in bytes.
 ///
-unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELabel::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
@@ -315,7 +380,7 @@ unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIELabel::print(raw_ostream &O) const {
+void DIELabel::printImpl(raw_ostream &O) const {
   O << "Lbl: " << Label->getName();
 }
 #endif
@@ -326,13 +391,13 @@ void DIELabel::print(raw_ostream &O) const {
 
 /// EmitValue - Emit delta value.
 ///
-void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEDelta::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of delta value in bytes.
 ///
-unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEDelta::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
@@ -340,7 +405,7 @@ unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIEDelta::print(raw_ostream &O) const {
+void DIEDelta::printImpl(raw_ostream &O) const {
   O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
 }
 #endif
@@ -351,20 +416,50 @@ void DIEDelta::print(raw_ostream &O) const {
 
 /// EmitValue - Emit string value.
 ///
-void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
-  Access->EmitValue(AP, Form);
+void DIEString::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+  assert(
+      (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
+      "Expected valid string form");
+
+  // Index of string in symbol table.
+  if (Form == dwarf::DW_FORM_GNU_str_index) {
+    DIEInteger(S.getIndex()).EmitValue(AP, Form);
+    return;
+  }
+
+  // Relocatable symbol.
+  assert(Form == dwarf::DW_FORM_strp);
+  if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
+    DIELabel(S.getSymbol()).EmitValue(AP, Form);
+    return;
+  }
+
+  // Offset into symbol table.
+  DIEInteger(S.getOffset()).EmitValue(AP, Form);
 }
 
 /// SizeOf - Determine size of delta value in bytes.
 ///
-unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
-  return Access->SizeOf(AP, Form);
+unsigned DIEString::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
+  assert(
+      (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
+      "Expected valid string form");
+
+  // Index of string in symbol table.
+  if (Form == dwarf::DW_FORM_GNU_str_index)
+    return DIEInteger(S.getIndex()).SizeOf(AP, Form);
+
+  // Relocatable symbol.
+  if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+    return DIELabel(S.getSymbol()).SizeOf(AP, Form);
+
+  // Offset into symbol table.
+  return DIEInteger(S.getOffset()).SizeOf(AP, Form);
 }
 
 #ifndef NDEBUG
-void DIEString::print(raw_ostream &O) const {
-  O << "String: " << Str << "\tSymbol: ";
-  Access->print(O);
+void DIEString::printImpl(raw_ostream &O) const {
+  O << "String: " << S.getString();
 }
 #endif
 
@@ -372,32 +467,9 @@ void DIEString::print(raw_ostream &O) const {
 // DIEEntry Implementation
 //===----------------------------------------------------------------------===//
 
-/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the
-/// directive is specified by Size and Hi/Lo specify the labels.
-static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi,
-                                      uint64_t Offset, const MCSymbol *Lo,
-                                      unsigned Size) {
-  MCContext &Context = Streamer.getContext();
-
-  // Emit Hi+Offset - Lo
-  // Get the Hi+Offset expression.
-  const MCExpr *Plus =
-      MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context),
-                              MCConstantExpr::Create(Offset, Context), Context);
-
-  // Get the Hi+Offset-Lo expression.
-  const MCExpr *Diff = MCBinaryExpr::CreateSub(
-      Plus, MCSymbolRefExpr::Create(Lo, Context), Context);
-
-  // Otherwise, emit with .set (aka assignment).
-  MCSymbol *SetLabel = Context.CreateTempSymbol();
-  Streamer.EmitAssignment(SetLabel, Diff);
-  Streamer.EmitSymbolValue(SetLabel, Size);
-}
-
 /// EmitValue - Emit debug information entry offset.
 ///
-void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEEntry::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
 
   if (Form == dwarf::DW_FORM_ref_addr) {
     const DwarfDebug *DD = AP->getDwarfDebug();
@@ -413,14 +485,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
       AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
                               DIEEntry::getRefAddrSize(AP));
     else
-      emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr,
-                                CU->getSectionSym(),
-                                DIEEntry::getRefAddrSize(AP));
+      AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
   } else
     AP->EmitInt32(Entry.getOffset());
 }
 
-unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
+unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
   // DWARF4: References that use the attribute form DW_FORM_ref_addr are
   // specified to be four bytes in the DWARF 32-bit format and eight bytes
   // in the DWARF 64-bit format, while DWARF Version 2 specifies that such
@@ -433,7 +503,7 @@ unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
 }
 
 #ifndef NDEBUG
-void DIEEntry::print(raw_ostream &O) const {
+void DIEEntry::printImpl(raw_ostream &O) const {
   O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
 }
 #endif
@@ -441,17 +511,15 @@ void DIEEntry::print(raw_ostream &O) const {
 //===----------------------------------------------------------------------===//
 // DIETypeSignature Implementation
 //===----------------------------------------------------------------------===//
-void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIETypeSignature::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
   assert(Form == dwarf::DW_FORM_ref_sig8);
-  Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8);
+  Asm->OutStreamer->EmitIntValue(Unit.getTypeSignature(), 8);
 }
 
 #ifndef NDEBUG
-void DIETypeSignature::print(raw_ostream &O) const {
+void DIETypeSignature::printImpl(raw_ostream &O) const {
   O << format("Type Unit: 0x%lx", Unit.getTypeSignature());
 }
-
-void DIETypeSignature::dump() const { print(dbgs()); }
 #endif
 
 //===----------------------------------------------------------------------===//
@@ -460,7 +528,7 @@ void DIETypeSignature::dump() const { print(dbgs()); }
 
 /// ComputeSize - calculate the size of the location expression.
 ///
-unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
+unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
   if (!Size) {
     const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
     for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -472,7 +540,7 @@ unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
 
 /// EmitValue - Emit location data.
 ///
-void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIELoc::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
   switch (Form) {
   default: llvm_unreachable("Improper form for block");
   case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
@@ -490,7 +558,7 @@ void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
 
 /// SizeOf - Determine size of location data in bytes.
 ///
-unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELoc::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   switch (Form) {
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -503,7 +571,7 @@ unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIELoc::print(raw_ostream &O) const {
+void DIELoc::printImpl(raw_ostream &O) const {
   O << "ExprLoc: ";
   DIE::print(O, 5);
 }
@@ -515,7 +583,7 @@ void DIELoc::print(raw_ostream &O) const {
 
 /// ComputeSize - calculate the size of the block.
 ///
-unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
+unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
   if (!Size) {
     const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
     for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -527,7 +595,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
 
 /// EmitValue - Emit block data.
 ///
-void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEBlock::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const {
   switch (Form) {
   default: llvm_unreachable("Improper form for block");
   case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
@@ -543,7 +611,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
 
 /// SizeOf - Determine size of block data in bytes.
 ///
-unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEBlock::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   switch (Form) {
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -554,7 +622,7 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 }
 
 #ifndef NDEBUG
-void DIEBlock::print(raw_ostream &O) const {
+void DIEBlock::printImpl(raw_ostream &O) const {
   O << "Blk: ";
   DIE::print(O, 5);
 }
@@ -564,7 +632,7 @@ void DIEBlock::print(raw_ostream &O) const {
 // DIELocList Implementation
 //===----------------------------------------------------------------------===//
 
-unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELocList::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4)
     return 4;
   if (Form == dwarf::DW_FORM_sec_offset)
@@ -574,18 +642,18 @@ unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
 
 /// EmitValue - Emit label value.
 ///
-void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIELocList::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const {
   DwarfDebug *DD = AP->getDwarfDebug();
-  MCSymbol *Label = DD->getDebugLocEntries()[Index].Label;
+  MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
 
   if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf())
-    AP->EmitSectionOffset(Label, DD->getDebugLocSym());
+    AP->emitSectionOffset(Label);
   else
-    AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4);
+    AP->EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
 }
 
 #ifndef NDEBUG
-void DIELocList::print(raw_ostream &O) const {
+void DIELocList::printImpl(raw_ostream &O) const {
   O << "LocList: " << Index;
 
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 1e2ba2c..a2e5aad 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -285,8 +285,8 @@ void DIEHash::hashBlockData(const SmallVectorImpl<DIEValue *> &Values) {
 void DIEHash::hashLocList(const DIELocList &LocList) {
   HashingByteStreamer Streamer(*this);
   DwarfDebug &DD = *AP->getDwarfDebug();
-  for (const auto &Entry :
-       DD.getDebugLocEntries()[LocList.getValue()].List)
+  const DebugLocStream &Locs = DD.getDebugLocs();
+  for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue())))
     DD.emitDebugLocEntry(Streamer, Entry);
 }
 
@@ -510,7 +510,7 @@ uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) {
   // ... take the least significant 8 bytes and return those. Our MD5
   // implementation always returns its results in little endian, swap bytes
   // appropriately.
-  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  return support::endian::read64le(Result + 8);
 }
 
 /// This is based on the type signature computation given in section 7.27 of the
@@ -531,7 +531,7 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) {
   // ... take the least significant 8 bytes and return those. Our MD5
   // implementation always returns its results in little endian, swap bytes
   // appropriately.
-  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  return support::endian::read64le(Result + 8);
 }
 
 /// This is based on the type signature computation given in section 7.27 of the
@@ -555,5 +555,5 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
   // ... take the least significant 8 bytes and return those. Our MD5
   // implementation always returns its results in little endian, swap bytes
   // appropriately.
-  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  return support::endian::read64le(Result + 8);
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 0c2a5e5..3c46a99 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 #include <map>
@@ -32,7 +33,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
 }
 
-void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
+void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
                                          const MachineInstr &MI) {
   // Instruction range should start with a DBG_VALUE instruction for the
   // variable.
@@ -47,7 +48,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
   Ranges.push_back(std::make_pair(&MI, nullptr));
 }
 
-void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
+void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
                                        const MachineInstr &MI) {
   auto &Ranges = VarInstrRanges[Var];
   // Verify that the current instruction range is not yet closed.
@@ -58,7 +59,7 @@ void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
   Ranges.back().second = &MI;
 }
 
-unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
+unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
   const auto &I = VarInstrRanges.find(Var);
   if (I == VarInstrRanges.end())
     return 0;
@@ -70,12 +71,13 @@ unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
 
 namespace {
 // Maps physreg numbers to the variables they describe.
-typedef std::map<unsigned, SmallVector<const MDNode *, 1>> RegDescribedVarsMap;
+typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap;
 }
 
 // \brief Claim that @Var is not described by @RegNo anymore.
-static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
-                                unsigned RegNo, const MDNode *Var) {
+static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+                                InlinedVariable Var) {
   const auto &I = RegVars.find(RegNo);
   assert(RegNo != 0U && I != RegVars.end());
   auto &VarSet = I->second;
@@ -88,8 +90,8 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
 }
 
 // \brief Claim that @Var is now described by @RegNo.
-static void addRegDescribedVar(RegDescribedVarsMap &RegVars,
-                               unsigned RegNo, const MDNode *Var) {
+static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+                               InlinedVariable Var) {
   assert(RegNo != 0U);
   auto &VarSet = RegVars[RegNo];
   assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
@@ -202,7 +204,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
       // Use the base variable (without any DW_OP_piece expressions)
       // as index into History. The full variables including the
       // piece expressions are attached to the MI.
-      DIVariable Var = MI.getDebugVariable();
+      const DILocalVariable *RawVar = MI.getDebugVariable();
+      assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+             "Expected inlined-at fields to agree");
+      InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
 
       if (unsigned PrevReg = Result.getRegisterForVar(Var))
         dropRegDescribedVar(RegVars, PrevReg, Var);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 4b62007..546d1b4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -17,7 +17,8 @@ namespace llvm {
 
 class MachineFunction;
 class MachineInstr;
-class MDNode;
+class DILocalVariable;
+class DILocation;
 class TargetRegisterInfo;
 
 // For each user variable, keep a list of instruction ranges where this variable
@@ -31,16 +32,19 @@ class DbgValueHistoryMap {
 public:
   typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
   typedef SmallVector<InstrRange, 4> InstrRanges;
-  typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap;
+  typedef std::pair<const DILocalVariable *, const DILocation *>
+      InlinedVariable;
+  typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap;
+
 private:
   InstrRangesMap VarInstrRanges;
 
 public:
-  void startInstrRange(const MDNode *Var, const MachineInstr &MI);
-  void endInstrRange(const MDNode *Var, const MachineInstr &MI);
+  void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
+  void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
   // Returns register currently describing @Var. If @Var is currently
   // unaccessible or is not described by a register, returns 0.
-  unsigned getRegisterForVar(const MDNode *Var) const;
+  unsigned getRegisterForVar(InlinedVariable Var) const;
 
   bool empty() const { return VarInstrRanges.empty(); }
   void clear() { VarInstrRanges.clear(); }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index b4fcada..6a943c6 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -9,52 +9,51 @@
 
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+#include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
 
 namespace llvm {
-class MDNode;
+class AsmPrinter;
+class DebugLocStream;
+
 /// \brief This struct describes location entries emitted in the .debug_loc
 /// section.
 class DebugLocEntry {
-  // Begin and end symbols for the address range that this location is valid.
+  /// Begin and end symbols for the address range that this location is valid.
   const MCSymbol *Begin;
   const MCSymbol *End;
 
 public:
-  /// A single location or constant.
+  /// \brief A single location or constant.
   struct Value {
-    Value(const MDNode *Var, const MDNode *Expr, int64_t i)
-        : Variable(Var), Expression(Expr), EntryKind(E_Integer) {
+    Value(const DIExpression *Expr, int64_t i)
+        : Expression(Expr), EntryKind(E_Integer) {
       Constant.Int = i;
     }
-    Value(const MDNode *Var, const MDNode *Expr, const ConstantFP *CFP)
-        : Variable(Var), Expression(Expr), EntryKind(E_ConstantFP) {
+    Value(const DIExpression *Expr, const ConstantFP *CFP)
+        : Expression(Expr), EntryKind(E_ConstantFP) {
       Constant.CFP = CFP;
     }
-    Value(const MDNode *Var, const MDNode *Expr, const ConstantInt *CIP)
-        : Variable(Var), Expression(Expr), EntryKind(E_ConstantInt) {
+    Value(const DIExpression *Expr, const ConstantInt *CIP)
+        : Expression(Expr), EntryKind(E_ConstantInt) {
       Constant.CIP = CIP;
     }
-    Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc)
-        : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) {
-      assert(DIVariable(Var).Verify());
-      assert(DIExpression(Expr).Verify());
+    Value(const DIExpression *Expr, MachineLocation Loc)
+        : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
+      assert(cast<DIExpression>(Expr)->isValid());
     }
 
-    // The variable to which this location entry corresponds.
-    const MDNode *Variable;
-
-    // Any complex address location expression for this Value.
-    const MDNode *Expression;
+    /// Any complex address location expression for this Value.
+    const DIExpression *Expression;
 
-    // Type of entry that this represents.
+    /// Type of entry that this represents.
     enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
     enum EntryType EntryKind;
 
-    // Either a constant,
+    /// Either a constant,
     union {
       int64_t Int;
       const ConstantFP *CFP;
@@ -72,10 +71,8 @@ public:
     const ConstantFP *getConstantFP() const { return Constant.CFP; }
     const ConstantInt *getConstantInt() const { return Constant.CIP; }
     MachineLocation getLoc() const { return Loc; }
-    const MDNode *getVariableNode() const { return Variable; }
-    DIVariable getVariable() const { return DIVariable(Variable); }
-    bool isVariablePiece() const { return getExpression().isVariablePiece(); }
-    DIExpression getExpression() const { return DIExpression(Expression); }
+    bool isBitPiece() const { return getExpression()->isBitPiece(); }
+    const DIExpression *getExpression() const { return Expression; }
     friend bool operator==(const Value &, const Value &);
     friend bool operator<(const Value &, const Value &);
   };
@@ -92,17 +89,14 @@ public:
   }
 
   /// \brief If this and Next are describing different pieces of the same
-  // variable, merge them by appending Next's values to the current
-  // list of values.
-  // Return true if the merge was successful.
+  /// variable, merge them by appending Next's values to the current
+  /// list of values.
+  /// Return true if the merge was successful.
   bool MergeValues(const DebugLocEntry &Next) {
     if (Begin == Next.Begin) {
-      DIExpression Expr(Values[0].Expression);
-      DIVariable Var(Values[0].Variable);
-      DIExpression NextExpr(Next.Values[0].Expression);
-      DIVariable NextVar(Next.Values[0].Variable);
-      if (Var == NextVar && Expr.isVariablePiece() &&
-          NextExpr.isVariablePiece()) {
+      auto *Expr = cast_or_null<DIExpression>(Values[0].Expression);
+      auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression);
+      if (Expr->isBitPiece() && NextExpr->isBitPiece()) {
         addValues(Next.Values);
         End = Next.End;
         return true;
@@ -131,24 +125,28 @@ public:
     Values.append(Vals.begin(), Vals.end());
     sortUniqueValues();
     assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
-          return V.isVariablePiece();
+          return V.isBitPiece();
         }) && "value must be a piece");
   }
 
-  // Sort the pieces by offset.
+  // \brief Sort the pieces by offset.
   // Remove any duplicate entries by dropping all but the first.
   void sortUniqueValues() {
     std::sort(Values.begin(), Values.end());
-    Values.erase(std::unique(Values.begin(), Values.end(),
-                             [](const Value &A, const Value &B) {
-                   return A.getVariable() == B.getVariable() &&
-                          A.getExpression() == B.getExpression();
-                 }),
-                 Values.end());
+    Values.erase(
+        std::unique(
+            Values.begin(), Values.end(), [](const Value &A, const Value &B) {
+              return A.getExpression() == B.getExpression();
+            }),
+        Values.end());
   }
+
+  /// \brief Lower this entry into a DWARF expression.
+  void finalize(const AsmPrinter &AP, DebugLocStream &Locs,
+                const DIBasicType *BT);
 };
 
-/// Compare two Values for equality.
+/// \brief Compare two Values for equality.
 inline bool operator==(const DebugLocEntry::Value &A,
                        const DebugLocEntry::Value &B) {
   if (A.EntryKind != B.EntryKind)
@@ -157,9 +155,6 @@ inline bool operator==(const DebugLocEntry::Value &A,
   if (A.Expression != B.Expression)
     return false;
 
-  if (A.Variable != B.Variable)
-    return false;
-
   switch (A.EntryKind) {
   case DebugLocEntry::Value::E_Location:
     return A.Loc == B.Loc;
@@ -173,11 +168,11 @@ inline bool operator==(const DebugLocEntry::Value &A,
   llvm_unreachable("unhandled EntryKind");
 }
 
-/// Compare two pieces based on their offset.
+/// \brief Compare two pieces based on their offset.
 inline bool operator<(const DebugLocEntry::Value &A,
                       const DebugLocEntry::Value &B) {
-  return A.getExpression().getPieceOffset() <
-         B.getExpression().getPieceOffset();
+  return A.getExpression()->getBitPieceOffset() <
+         B.getExpression()->getBitPieceOffset();
 }
 
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h
deleted file mode 100644
index 0f1d2ed..0000000
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===--- lib/CodeGen/DebugLocList.h - DWARF debug_loc list ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
-
-#include "DebugLocEntry.h"
-#include "llvm/ADT/SmallVector.h"
-
-namespace llvm {
-class DwarfCompileUnit;
-class MCSymbol;
-struct DebugLocList {
-  MCSymbol *Label;
-  DwarfCompileUnit *CU;
-  SmallVector<DebugLocEntry, 4> List;
-};
-}
-#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
new file mode 100644
index 0000000..3001da2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -0,0 +1,133 @@
+//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "ByteStreamer.h"
+
+namespace llvm {
+class DwarfCompileUnit;
+class MCSymbol;
+
+/// \brief Byte stream of .debug_loc entries.
+///
+/// Stores a unified stream of .debug_loc entries.  There's \a List for each
+/// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry.
+///
+/// FIXME: Do we need all these temp symbols?
+/// FIXME: Why not output directly to the output stream?
+class DebugLocStream {
+public:
+  struct List {
+    DwarfCompileUnit *CU;
+    MCSymbol *Label;
+    size_t EntryOffset;
+    List(DwarfCompileUnit *CU, MCSymbol *Label, size_t EntryOffset)
+        : CU(CU), Label(Label), EntryOffset(EntryOffset) {}
+  };
+  struct Entry {
+    const MCSymbol *BeginSym;
+    const MCSymbol *EndSym;
+    size_t ByteOffset;
+    size_t CommentOffset;
+    Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset,
+          size_t CommentOffset)
+        : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset),
+          CommentOffset(CommentOffset) {}
+  };
+
+private:
+  SmallVector<List, 4> Lists;
+  SmallVector<Entry, 32> Entries;
+  SmallString<256> DWARFBytes;
+  SmallVector<std::string, 32> Comments;
+
+  /// \brief Only verbose textual output needs comments.  This will be set to
+  /// true for that case, and false otherwise.
+  bool GenerateComments;
+
+public:
+  DebugLocStream(bool GenerateComments) : GenerateComments(GenerateComments) { }
+  size_t getNumLists() const { return Lists.size(); }
+  const List &getList(size_t LI) const { return Lists[LI]; }
+  ArrayRef<List> getLists() const { return Lists; }
+
+  /// \brief Start a new .debug_loc entry list.
+  ///
+  /// Start a new .debug_loc entry list.  Return the new list's index so it can
+  /// be retrieved later via \a getList().
+  ///
+  /// Until the next call, \a startEntry() will add entries to this list.
+  size_t startList(DwarfCompileUnit *CU, MCSymbol *Label) {
+    size_t LI = Lists.size();
+    Lists.emplace_back(CU, Label, Entries.size());
+    return LI;
+  }
+
+  /// \brief Start a new .debug_loc entry.
+  ///
+  /// Until the next call, bytes added to the stream will be added to this
+  /// entry.
+  void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) {
+    Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size());
+  }
+
+  BufferByteStreamer getStreamer() {
+    return BufferByteStreamer(DWARFBytes, Comments, GenerateComments);
+  }
+
+  ArrayRef<Entry> getEntries(const List &L) const {
+    size_t LI = getIndex(L);
+    return makeArrayRef(Entries)
+        .slice(Lists[LI].EntryOffset, getNumEntries(LI));
+  }
+
+  ArrayRef<char> getBytes(const Entry &E) const {
+    size_t EI = getIndex(E);
+    return makeArrayRef(DWARFBytes.begin(), DWARFBytes.end())
+        .slice(Entries[EI].ByteOffset, getNumBytes(EI));
+  }
+  ArrayRef<std::string> getComments(const Entry &E) const {
+    size_t EI = getIndex(E);
+    return makeArrayRef(Comments)
+        .slice(Entries[EI].CommentOffset, getNumComments(EI));
+  }
+
+private:
+  size_t getIndex(const List &L) const {
+    assert(&Lists.front() <= &L && &L <= &Lists.back() &&
+           "Expected valid list");
+    return &L - &Lists.front();
+  }
+  size_t getIndex(const Entry &E) const {
+    assert(&Entries.front() <= &E && &E <= &Entries.back() &&
+           "Expected valid entry");
+    return &E - &Entries.front();
+  }
+  size_t getNumEntries(size_t LI) const {
+    if (LI + 1 == Lists.size())
+      return Entries.size() - Lists[LI].EntryOffset;
+    return Lists[LI + 1].EntryOffset - Lists[LI].EntryOffset;
+  }
+  size_t getNumBytes(size_t EI) const {
+    if (EI + 1 == Entries.size())
+      return DWARFBytes.size() - Entries[EI].ByteOffset;
+    return Entries[EI + 1].ByteOffset - Entries[EI].ByteOffset;
+  }
+  size_t getNumComments(size_t EI) const {
+    if (EI + 1 == Entries.size())
+      return Comments.size() - Entries[EI].CommentOffset;
+    return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset;
+  }
+};
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index a71f35e..58b406b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -30,14 +30,14 @@ DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList)
     : Header(8 + (atomList.size() * 4)), HeaderData(atomList),
       Entries(Allocator) {}
 
-void DwarfAccelTable::AddName(StringRef Name, MCSymbol *StrSym, const DIE *die,
+void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die,
                               char Flags) {
   assert(Data.empty() && "Already finalized!");
   // If the string is in the list already then add this die to the list
   // otherwise add a new one.
-  DataArray &DIEs = Entries[Name];
-  assert(!DIEs.StrSym || DIEs.StrSym == StrSym);
-  DIEs.StrSym = StrSym;
+  DataArray &DIEs = Entries[Name.getString()];
+  assert(!DIEs.Name || DIEs.Name == Name);
+  DIEs.Name = Name;
   DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags));
 }
 
@@ -54,7 +54,7 @@ void DwarfAccelTable::ComputeBucketCount(void) {
   // Then compute the bucket size, minimum of 1 bucket.
   if (num > 1024)
     Header.bucket_count = num / 4;
-  if (num > 16)
+  else if (num > 16)
     Header.bucket_count = num / 2;
   else
     Header.bucket_count = num > 0 ? num : 1;
@@ -70,6 +70,7 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
 
 void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
   // Create the individual hash data outputs.
+  Data.reserve(Entries.size());
   for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end();
        EI != EE; ++EI) {
 
@@ -95,33 +96,42 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
   for (size_t i = 0, e = Data.size(); i < e; ++i) {
     uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
     Buckets[bucket].push_back(Data[i]);
-    Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+    Data[i]->Sym = Asm->createTempSymbol(Prefix);
   }
+
+  // Sort the contents of the buckets by hash value so that hash
+  // collisions end up together. Stable sort makes testing easier and
+  // doesn't cost much more.
+  for (size_t i = 0; i < Buckets.size(); ++i)
+    std::stable_sort(Buckets[i].begin(), Buckets[i].end(),
+                     [] (HashData *LHS, HashData *RHS) {
+                       return LHS->HashValue < RHS->HashValue;
+                     });
 }
 
 // Emits the header for the table via the AsmPrinter.
 void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
-  Asm->OutStreamer.AddComment("Header Magic");
+  Asm->OutStreamer->AddComment("Header Magic");
   Asm->EmitInt32(Header.magic);
-  Asm->OutStreamer.AddComment("Header Version");
+  Asm->OutStreamer->AddComment("Header Version");
   Asm->EmitInt16(Header.version);
-  Asm->OutStreamer.AddComment("Header Hash Function");
+  Asm->OutStreamer->AddComment("Header Hash Function");
   Asm->EmitInt16(Header.hash_function);
-  Asm->OutStreamer.AddComment("Header Bucket Count");
+  Asm->OutStreamer->AddComment("Header Bucket Count");
   Asm->EmitInt32(Header.bucket_count);
-  Asm->OutStreamer.AddComment("Header Hash Count");
+  Asm->OutStreamer->AddComment("Header Hash Count");
   Asm->EmitInt32(Header.hashes_count);
-  Asm->OutStreamer.AddComment("Header Data Length");
+  Asm->OutStreamer->AddComment("Header Data Length");
   Asm->EmitInt32(Header.header_data_len);
-  Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+  Asm->OutStreamer->AddComment("HeaderData Die Offset Base");
   Asm->EmitInt32(HeaderData.die_offset_base);
-  Asm->OutStreamer.AddComment("HeaderData Atom Count");
+  Asm->OutStreamer->AddComment("HeaderData Atom Count");
   Asm->EmitInt32(HeaderData.Atoms.size());
   for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
     Atom A = HeaderData.Atoms[i];
-    Asm->OutStreamer.AddComment(dwarf::AtomTypeString(A.type));
+    Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.type));
     Asm->EmitInt16(A.type);
-    Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+    Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.form));
     Asm->EmitInt16(A.form);
   }
 }
@@ -131,24 +141,37 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
 void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
   unsigned index = 0;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
-    Asm->OutStreamer.AddComment("Bucket " + Twine(i));
+    Asm->OutStreamer->AddComment("Bucket " + Twine(i));
     if (Buckets[i].size() != 0)
       Asm->EmitInt32(index);
     else
       Asm->EmitInt32(UINT32_MAX);
-    index += Buckets[i].size();
+    // Buckets point in the list of hashes, not to the data. Do not
+    // increment the index multiple times in case of hash collisions.
+    uint64_t PrevHash = UINT64_MAX;
+    for (auto *HD : Buckets[i]) {
+      uint32_t HashValue = HD->HashValue;
+      if (PrevHash != HashValue)
+        ++index;
+      PrevHash = HashValue;
+    }
   }
 }
 
 // Walk through the buckets and emit the individual hashes for each
 // bucket.
 void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+  uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
-      Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
-      Asm->EmitInt32((*HI)->HashValue);
+      uint32_t HashValue = (*HI)->HashValue;
+      if (PrevHash == HashValue)
+        continue;
+      Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(i));
+      Asm->EmitInt32(HashValue);
+      PrevHash = HashValue;
     }
   }
 }
@@ -157,17 +180,22 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
 // element in each bucket. This is done via a symbol subtraction from the
 // beginning of the section. The non-section symbol will be output later
 // when we emit the actual data.
-void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
+  uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
-      Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
-      MCContext &Context = Asm->OutStreamer.getContext();
+      uint32_t HashValue = (*HI)->HashValue;
+      if (PrevHash == HashValue)
+        continue;
+      PrevHash = HashValue;
+      Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
+      MCContext &Context = Asm->OutStreamer->getContext();
       const MCExpr *Sub = MCBinaryExpr::CreateSub(
           MCSymbolRefExpr::Create((*HI)->Sym, Context),
           MCSymbolRefExpr::Create(SecBegin, Context), Context);
-      Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t));
+      Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t));
     }
   }
 }
@@ -175,18 +203,21 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
 // Walk through the buckets and emit the full data for each element in
 // the bucket. For the string case emit the dies and the various offsets.
 // Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
-                               MCSymbol *StrSym) {
-  uint64_t PrevHash = UINT64_MAX;
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    uint64_t PrevHash = UINT64_MAX;
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
+      // Terminate the previous entry if there is no hash collision
+      // with the current one.
+      if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+        Asm->EmitInt32(0);
       // Remember to emit the label for our offset.
-      Asm->OutStreamer.EmitLabel((*HI)->Sym);
-      Asm->OutStreamer.AddComment((*HI)->Str);
-      Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym);
-      Asm->OutStreamer.AddComment("Num DIEs");
+      Asm->OutStreamer->EmitLabel((*HI)->Sym);
+      Asm->OutStreamer->AddComment((*HI)->Str);
+      Asm->emitDwarfStringOffset((*HI)->Data.Name);
+      Asm->OutStreamer->AddComment("Num DIEs");
       Asm->EmitInt32((*HI)->Data.Values.size());
       for (HashDataContents *HD : (*HI)->Data.Values) {
         // Emit the DIE offset
@@ -200,17 +231,17 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
           Asm->EmitInt8(HD->Flags);
         }
       }
-      // Emit a 0 to terminate the data unless we have a hash collision.
-      if (PrevHash != (*HI)->HashValue)
-        Asm->EmitInt32(0);
       PrevHash = (*HI)->HashValue;
     }
+    // Emit the final end marker for the bucket.
+    if (!Buckets[i].empty())
+      Asm->EmitInt32(0);
   }
 }
 
 // Emit the entire data structure to the output file.
-void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
-                           MCSymbol *StrSym) {
+void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin,
+                           DwarfDebug *D) {
   // Emit the header.
   EmitHeader(Asm);
 
@@ -221,10 +252,10 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
   EmitHashes(Asm);
 
   // Emit the offsets.
-  EmitOffsets(Asm, SecBegin);
+  emitOffsets(Asm, SecBegin);
 
   // Emit the hash data.
-  EmitData(Asm, D, StrSym);
+  EmitData(Asm, D);
 }
 
 #ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 4a6085b..4d81441 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -181,9 +181,8 @@ public:
 private:
   // String Data
   struct DataArray {
-    MCSymbol *StrSym;
+    DwarfStringPoolEntryRef Name;
     std::vector<HashDataContents *> Values;
-    DataArray() : StrSym(nullptr) {}
   };
   friend struct HashData;
   struct HashData {
@@ -201,7 +200,7 @@ private:
       O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
       O << "  Symbol: ";
       if (Sym)
-        Sym->print(O);
+        O << *Sym;
       else
         O << "<none>";
       O << "\n";
@@ -215,15 +214,15 @@ private:
 #endif
   };
 
-  DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
-  void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
+  DwarfAccelTable(const DwarfAccelTable &) = delete;
+  void operator=(const DwarfAccelTable &) = delete;
 
   // Internal Functions
   void EmitHeader(AsmPrinter *);
   void EmitBuckets(AsmPrinter *);
   void EmitHashes(AsmPrinter *);
-  void EmitOffsets(AsmPrinter *, MCSymbol *);
-  void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym);
+  void emitOffsets(AsmPrinter *, const MCSymbol *);
+  void EmitData(AsmPrinter *, DwarfDebug *D);
 
   // Allocator for HashData and HashDataContents.
   BumpPtrAllocator Allocator;
@@ -245,10 +244,9 @@ private:
   // Public Implementation
 public:
   DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
-  void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die,
-               char Flags = 0);
+  void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0);
   void FinalizeTable(AsmPrinter *, StringRef);
-  void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym);
+  void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *);
 #ifndef NDEBUG
   void print(raw_ostream &O);
   void dump() { print(dbgs()); }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 0dc52da..0bc873e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -39,9 +39,24 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
+DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
+    : EHStreamer(A), shouldEmitCFI(false) {}
+
+void DwarfCFIExceptionBase::markFunctionEnd() {
+  if (shouldEmitCFI)
+    Asm->OutStreamer->EmitCFIEndProc();
+
+  if (MMI->getLandingPads().empty())
+    return;
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+}
+
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
-  : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
-    shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
+    : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
+      shouldEmitLSDA(false), shouldEmitMoves(false),
+      moveTypeModule(AsmPrinter::CFI_M_None) {}
 
 DwarfCFIException::~DwarfCFIException() {}
 
@@ -49,9 +64,10 @@ DwarfCFIException::~DwarfCFIException() {}
 /// content.
 void DwarfCFIException::endModule() {
   if (moveTypeModule == AsmPrinter::CFI_M_Debug)
-    Asm->OutStreamer.EmitCFISections(false, true);
+    Asm->OutStreamer->EmitCFISections(false, true);
 
-  if (!Asm->MAI->usesItaniumLSDAForExceptions())
+  // SjLj uses this pass and it doesn't need this info.
+  if (!Asm->MAI->usesCFIForEH())
     return;
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -67,12 +83,10 @@ void DwarfCFIException::endModule() {
     if (!Personalities[i])
       continue;
     MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
-    TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
+    TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->TM, Sym);
   }
 }
 
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
 void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
@@ -90,7 +104,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+  const Function *Per = MMI->getPersonality();
 
   shouldEmitPersonality = hasLandingPads &&
     PerEncoding != dwarf::DW_EH_PE_omit && Per;
@@ -99,10 +113,11 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
-  if (!shouldEmitPersonality && !shouldEmitMoves)
+  shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves;
+  if (!shouldEmitCFI)
     return;
 
-  Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false);
+  Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false);
 
   // Indicate personality routine, if any.
   if (!shouldEmitPersonality)
@@ -110,45 +125,20 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
 
   const MCSymbol *Sym =
       TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-  Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
-
-  MCSymbol *EHBegin =
-      Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
-  if (Asm->MAI->useAssignmentForEHBegin()) {
-    MCContext &Ctx = Asm->OutContext;
-    MCSymbol *CurPos = Ctx.CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(CurPos);
-    Asm->OutStreamer.EmitAssignment(EHBegin,
-                                    MCSymbolRefExpr::Create(CurPos, Ctx));
-  } else {
-    Asm->OutStreamer.EmitLabel(EHBegin);
-  }
+  Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
 
   // Provide LSDA information.
   if (!shouldEmitLSDA)
     return;
 
-  Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
-                                                  Asm->getFunctionNumber()),
-                               LSDAEncoding);
+  Asm->OutStreamer->EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding);
 }
 
 /// endFunction - Gather and emit post-function exception information.
 ///
 void DwarfCFIException::endFunction(const MachineFunction *) {
-  if (!shouldEmitPersonality && !shouldEmitMoves)
-    return;
-
-  Asm->OutStreamer.EmitCFIEndProc();
-
   if (!shouldEmitPersonality)
     return;
 
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                Asm->getFunctionNumber()));
-
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
   emitExceptionTable();
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index b4dba9c..c10e703 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,6 +1,7 @@
 #include "DwarfCompileUnit.h"
 #include "DwarfExpression.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -15,11 +16,11 @@
 
 namespace llvm {
 
-DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
+DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
                                    AsmPrinter *A, DwarfDebug *DW,
                                    DwarfFile *DWU)
     : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU),
-      Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) {
+      Skeleton(nullptr), BaseAddress(nullptr) {
   insertDIE(Node, &getUnitDie());
 }
 
@@ -63,9 +64,9 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
 
   // FIXME: add a better feature test than hasRawTextSupport. Even better,
   // extend .file to support this.
-  return Asm->OutStreamer.EmitDwarfFileDirective(
+  return Asm->OutStreamer->EmitDwarfFileDirective(
       0, DirName, FileName,
-      Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID());
+      Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID());
 }
 
 // Return const expression if value is a GEP to access merged global
@@ -96,56 +97,57 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
 }
 
 /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
-DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
+DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
+    const DIGlobalVariable *GV) {
   // Check for pre-existence.
   if (DIE *Die = getDIE(GV))
     return Die;
 
-  assert(GV.isGlobalVariable());
+  assert(GV);
 
-  DIScope GVContext = GV.getContext();
-  DIType GTy = DD->resolve(GV.getType());
+  auto *GVContext = GV->getScope();
+  auto *GTy = DD->resolve(GV->getType());
 
   // Construct the context before querying for the existence of the DIE in
   // case such construction creates the DIE.
   DIE *ContextDIE = getOrCreateContextDIE(GVContext);
 
   // Add to map.
-  DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
-  DIScope DeclContext;
-
-  if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) {
-    DeclContext = resolve(SDMDecl.getContext());
-    assert(SDMDecl.isStaticMember() && "Expected static member decl");
-    assert(GV.isDefinition());
+  DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
+  DIScope *DeclContext;
+  if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
+    DeclContext = resolve(SDMDecl->getScope());
+    assert(SDMDecl->isStaticMember() && "Expected static member decl");
+    assert(GV->isDefinition());
     // We need the declaration DIE that is in the static member's class.
     DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
     addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
   } else {
-    DeclContext = GV.getContext();
+    DeclContext = GV->getScope();
     // Add name and type.
-    addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+    addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
     addType(*VariableDIE, GTy);
 
     // Add scoping info.
-    if (!GV.isLocalToUnit())
+    if (!GV->isLocalToUnit())
       addFlag(*VariableDIE, dwarf::DW_AT_external);
 
     // Add line number info.
     addSourceLine(*VariableDIE, GV);
   }
 
-  if (!GV.isDefinition())
+  if (!GV->isDefinition())
     addFlag(*VariableDIE, dwarf::DW_AT_declaration);
+  else
+    addGlobalName(GV->getName(), *VariableDIE, DeclContext);
 
   // Add location.
   bool addToAccelTable = false;
-  bool isGlobalVariable = GV.getGlobal() != nullptr;
-  if (isGlobalVariable) {
+  if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
     addToAccelTable = true;
     DIELoc *Loc = new (DIEValueAllocator) DIELoc();
-    const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
-    if (GV.getGlobal()->isThreadLocal()) {
+    const MCSymbol *Sym = Asm->getSymbol(Global);
+    if (Global->isThreadLocal()) {
       // FIXME: Make this work with -gsplit-dwarf.
       unsigned PointerSize = Asm->getDataLayout().getPointerSize();
       assert((PointerSize == 4 || PointerSize == 8) &&
@@ -164,28 +166,21 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
         addUInt(*Loc, dwarf::DW_FORM_udata,
                 DD->getAddressPool().getIndex(Sym, /* TLS */ true));
       }
-      // 3) followed by a custom OP to make the debugger do a TLS lookup.
-      addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
+      // 3) followed by an OP to make the debugger do a TLS lookup.
+      addUInt(*Loc, dwarf::DW_FORM_data1,
+              DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+                                    : dwarf::DW_OP_form_tls_address);
     } else {
       DD->addArangeLabel(SymbolCU(this, Sym));
       addOpAddress(*Loc, Sym);
     }
 
     addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
-    // Add the linkage name.
-    StringRef LinkageName = GV.getLinkageName();
-    if (!LinkageName.empty())
-      // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
-      // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
-      // TAG_variable.
-      addString(*VariableDIE,
-                DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
-                                           : dwarf::DW_AT_MIPS_linkage_name,
-                GlobalValue::getRealLinkageName(LinkageName));
+    addLinkageName(*VariableDIE, GV->getLinkageName());
   } else if (const ConstantInt *CI =
-                 dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+                 dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
     addConstantValue(*VariableDIE, CI, GTy);
-  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) {
+  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
     addToAccelTable = true;
     // GV is a merged global.
     DIELoc *Loc = new (DIEValueAllocator) DIELoc();
@@ -202,15 +197,14 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
   }
 
   if (addToAccelTable) {
-    DD->addAccelName(GV.getName(), *VariableDIE);
+    DD->addAccelName(GV->getName(), *VariableDIE);
 
     // If the linkage name is different than the name, go ahead and output
     // that as well into the name table.
-    if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
-      DD->addAccelName(GV.getLinkageName(), *VariableDIE);
+    if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName())
+      DD->addAccelName(GV->getLinkageName(), *VariableDIE);
   }
 
-  addGlobalName(GV.getName(), *VariableDIE, DeclContext);
   return VariableDIE;
 }
 
@@ -243,10 +237,10 @@ void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
     addSectionDelta(Die, Attribute, Label, Sec);
 }
 
-void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
+void DwarfCompileUnit::initStmtList() {
   // Define start line table label for each Compile Unit.
   MCSymbol *LineTableStartSym =
-      Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
+      Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
 
   stmtListIndex = UnitDie.getValues().size();
 
@@ -255,8 +249,9 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
   // left in the skeleton CU and so not included.
   // The line table entries are not always emitted in assembly, so it
   // is not okay to use line_table_start here.
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
-                  DwarfLineSectionSym);
+                  TLOF.getDwarfLineSection()->getBeginSymbol());
 }
 
 void DwarfCompileUnit::applyStmtList(DIE &D) {
@@ -282,18 +277,17 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
 // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
 // and DW_AT_high_pc attributes. If there are global variables in this
 // scope then create and insert DIEs for these variables.
-DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) {
+DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
   DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
 
-  attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym());
+  attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd());
   if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
           *DD->getCurrentFunction()))
     addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
   // Only include DW_AT_frame_base in full debug info
   if (!includeMinimalInlineScopes()) {
-    const TargetRegisterInfo *RI =
-        Asm->TM.getSubtargetImpl()->getRegisterInfo();
+    const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
     MachineLocation Location(RI->getFrameRegister(*Asm->MF));
     if (RI->isPhysicalRegister(Location.getReg()))
       addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
@@ -312,9 +306,9 @@ void DwarfCompileUnit::constructScopeDIE(
   if (!Scope || !Scope->getScopeNode())
     return;
 
-  DIScope DS(Scope->getScopeNode());
+  auto *DS = Scope->getScopeNode();
 
-  assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
+  assert((Scope->getInlinedAt() || !isa<DISubprogram>(DS)) &&
          "Only handle inlined subprograms here, use "
          "constructSubprogramScopeDIE for non-inlined "
          "subprograms");
@@ -325,7 +319,7 @@ void DwarfCompileUnit::constructScopeDIE(
   // avoid creating un-used children then removing them later when we find out
   // the scope DIE is null.
   std::unique_ptr<DIE> ScopeDIE;
-  if (Scope->getParent() && DS.isSubprogram()) {
+  if (Scope->getParent() && isa<DISubprogram>(DS)) {
     ScopeDIE = constructInlinedScopeDIE(Scope);
     if (!ScopeDIE)
       return;
@@ -347,7 +341,7 @@ void DwarfCompileUnit::constructScopeDIE(
       // There is no need to emit empty lexical block DIE.
       for (const auto &E : DD->findImportedEntitiesForScope(DS))
         Children.push_back(
-            constructImportedEntityDIE(DIImportedEntity(E.second)));
+            constructImportedEntityDIE(cast<DIImportedEntity>(E.second)));
     }
 
     // If there are only other scopes as children, put them directly in the
@@ -379,13 +373,14 @@ void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
 
 void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
                                          SmallVector<RangeSpan, 2> Range) {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
   // Emit offset in .debug_range as a relocatable label. emitDIE will handle
   // emitting it appropriately.
-  auto *RangeSectionSym = DD->getRangeSectionSym();
+  const MCSymbol *RangeSectionSym =
+      TLOF.getDwarfRangesSection()->getBeginSymbol();
 
-  RangeSpanList List(
-      Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()),
-      std::move(Range));
+  RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
 
   // Under fission, ranges are specified by constant offsets relative to the
   // CU's DW_AT_GNU_ranges_base.
@@ -424,8 +419,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
 std::unique_ptr<DIE>
 DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   assert(Scope->getScopeNode());
-  DIScope DS(Scope->getScopeNode());
-  DISubprogram InlinedSP = getDISubprogram(DS);
+  auto *DS = Scope->getScopeNode();
+  auto *InlinedSP = getDISubprogram(DS);
   // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
   // was inlined from another compile unit.
   DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP];
@@ -437,10 +432,10 @@ DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
 
   // Add the call site information to the DIE.
-  DILocation DL(Scope->getInlinedAt());
+  const DILocation *IA = Scope->getInlinedAt();
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
-          getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
-  addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
+          getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
+  addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
@@ -486,7 +481,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
 
   // Add variable address.
 
-  unsigned Offset = DV.getDotDebugLocOffset();
+  unsigned Offset = DV.getDebugLocListIndex();
   if (Offset != ~0U) {
     addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
     return VariableDie;
@@ -516,15 +511,23 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   }
 
   // .. else use frame index.
-  int FI = DV.getFrameIndex();
-  if (FI != ~0) {
+  if (DV.getFrameIndex().back() == ~0)
+    return VariableDie;
+
+  auto Expr = DV.getExpression().begin();
+  DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+  DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+  for (auto FI : DV.getFrameIndex()) {
     unsigned FrameReg = 0;
-    const TargetFrameLowering *TFI =
-        Asm->TM.getSubtargetImpl()->getFrameLowering();
+    const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
     int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
-    MachineLocation Location(FrameReg, Offset);
-    addVariableAddress(DV, *VariableDie, Location);
+    assert(Expr != DV.getExpression().end() &&
+           "Wrong number of expressions");
+    DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
+    DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
+    ++Expr;
   }
+  addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
 
   return VariableDie;
 }
@@ -560,16 +563,14 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   assert(Scope && Scope->getScopeNode());
   assert(!Scope->getInlinedAt());
   assert(!Scope->isAbstractScope());
-  DISubprogram Sub(Scope->getScopeNode());
-
-  assert(Sub.isSubprogram());
+  auto *Sub = cast<DISubprogram>(Scope->getScopeNode());
 
   DD->getProcessedSPNodes().insert(Sub);
 
   DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
 
   // If this is a variadic function, add an unspecified parameter.
-  DITypeArray FnArgs = Sub.getType().getTypeArray();
+  DITypeRefArray FnArgs = Sub->getType()->getTypeArray();
 
   // Collect lexical scope children first.
   // ObjectPointer might be a local (non-argument) local variable if it's a
@@ -580,8 +581,7 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   // If we have a single element of null, it is a function that returns void.
   // If we have more than one elements and the last one is null, it is a
   // variadic function.
-  if (FnArgs.getNumElements() > 1 &&
-      !FnArgs.getElement(FnArgs.getNumElements() - 1) &&
+  if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
       !includeMinimalInlineScopes())
     ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
 }
@@ -605,26 +605,25 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
   if (AbsDef)
     return;
 
-  DISubprogram SP(Scope->getScopeNode());
+  auto *SP = cast<DISubprogram>(Scope->getScopeNode());
 
   DIE *ContextDIE;
 
   if (includeMinimalInlineScopes())
     ContextDIE = &getUnitDie();
   // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
-  // the important distinction that the DIDescriptor is not associated with the
-  // DIE (since the DIDescriptor will be associated with the concrete DIE, if
+  // the important distinction that the debug node is not associated with the
+  // DIE (since the debug node will be associated with the concrete DIE, if
   // any). It could be refactored to some common utility function.
-  else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  else if (auto *SPDecl = SP->getDeclaration()) {
     ContextDIE = &getUnitDie();
     getOrCreateSubprogramDIE(SPDecl);
   } else
-    ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+    ContextDIE = getOrCreateContextDIE(resolve(SP->getScope()));
 
-  // Passing null as the associated DIDescriptor because the abstract definition
+  // Passing null as the associated node because the abstract definition
   // shouldn't be found by lookup.
-  AbsDef =
-      &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor());
+  AbsDef = &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
   applySubprogramAttributesToDefinition(SP, *AbsDef);
 
   if (!includeMinimalInlineScopes())
@@ -634,36 +633,33 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
 }
 
 std::unique_ptr<DIE>
-DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) {
-  assert(Module.Verify() &&
-         "Use one of the MDNode * overloads to handle invalid metadata");
-  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag());
+DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity *Module) {
+  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module->getTag());
   insertDIE(Module, IMDie.get());
   DIE *EntityDie;
-  DIDescriptor Entity = resolve(Module.getEntity());
-  if (Entity.isNameSpace())
-    EntityDie = getOrCreateNameSpace(DINameSpace(Entity));
-  else if (Entity.isSubprogram())
-    EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity));
-  else if (Entity.isType())
-    EntityDie = getOrCreateTypeDIE(DIType(Entity));
-  else if (Entity.isGlobalVariable())
-    EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity));
+  auto *Entity = resolve(Module->getEntity());
+  if (auto *NS = dyn_cast<DINamespace>(Entity))
+    EntityDie = getOrCreateNameSpace(NS);
+  else if (auto *SP = dyn_cast<DISubprogram>(Entity))
+    EntityDie = getOrCreateSubprogramDIE(SP);
+  else if (auto *T = dyn_cast<DIType>(Entity))
+    EntityDie = getOrCreateTypeDIE(T);
+  else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
+    EntityDie = getOrCreateGlobalVariableDIE(GV);
   else
     EntityDie = getDIE(Entity);
   assert(EntityDie);
-  addSourceLine(*IMDie, Module.getLineNumber(),
-                Module.getContext().getFilename(),
-                Module.getContext().getDirectory());
+  addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(),
+                Module->getScope()->getDirectory());
   addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
-  StringRef Name = Module.getName();
+  StringRef Name = Module->getName();
   if (!Name.empty())
     addString(*IMDie, dwarf::DW_AT_name, Name);
 
   return IMDie;
 }
 
-void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) {
+void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
   DIE *D = getDIE(SP);
   if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) {
     if (D)
@@ -680,39 +676,39 @@ void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) {
       applySubprogramAttributesToDefinition(SP, *D);
   }
 }
-void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
-  assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram");
-  assert(SP.isDefinition() &&
+void DwarfCompileUnit::collectDeadVariables(const DISubprogram *SP) {
+  assert(SP && "CU's subprogram list contains a non-subprogram");
+  assert(SP->isDefinition() &&
          "CU's subprogram list contains a subprogram declaration");
-  DIArray Variables = SP.getVariables();
-  if (Variables.getNumElements() == 0)
+  auto Variables = SP->getVariables();
+  if (Variables.size() == 0)
     return;
 
   DIE *SPDIE = DU->getAbstractSPDies().lookup(SP);
   if (!SPDIE)
     SPDIE = getDIE(SP);
   assert(SPDIE);
-  for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
-    DIVariable DV(Variables.getElement(vi));
-    assert(DV.isVariable());
-    DbgVariable NewVar(DV, DIExpression(nullptr), DD);
+  for (const DILocalVariable *DV : Variables) {
+    DbgVariable NewVar(DV, /* IA */ nullptr, /* Expr */ nullptr, DD);
     auto VariableDie = constructVariableDIE(NewVar);
     applyVariableAttributes(NewVar, *VariableDie);
     SPDIE->addChild(std::move(VariableDie));
   }
 }
 
-void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const {
+void DwarfCompileUnit::emitHeader(bool UseOffsets) {
   // Don't bother labeling the .dwo unit, as its offset isn't used.
-  if (!Skeleton)
-    Asm->OutStreamer.EmitLabel(LabelBegin);
+  if (!Skeleton) {
+    LabelBegin = Asm->createTempSymbol("cu_begin");
+    Asm->OutStreamer->EmitLabel(LabelBegin);
+  }
 
-  DwarfUnit::emitHeader(ASectionSym);
+  DwarfUnit::emitHeader(UseOffsets);
 }
 
 /// addGlobalName - Add a new global name to the compile unit.
 void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
-                                     DIScope Context) {
+                                     const DIScope *Context) {
   if (includeMinimalInlineScopes())
     return;
   std::string FullName = getParentContextString(Context) + Name.str();
@@ -720,11 +716,11 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
 }
 
 /// Add a new global type to the unit.
-void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die,
-                                     DIScope Context) {
+void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
+                                     const DIScope *Context) {
   if (includeMinimalInlineScopes())
     return;
-  std::string FullName = getParentContextString(Context) + Ty.getName().str();
+  std::string FullName = getParentContextString(Context) + Ty->getName().str();
   GlobalTypes[FullName] = &Die;
 }
 
@@ -737,18 +733,16 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
   else if (DV.isBlockByrefVariable())
     addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
   else
-    addAddress(Die, dwarf::DW_AT_location, Location,
-               DV.getVariable().isIndirect());
+    addAddress(Die, dwarf::DW_AT_location, Location);
 }
 
 /// Add an address attribute to a die based on the location provided.
 void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
-                                  const MachineLocation &Location,
-                                  bool Indirect) {
+                                  const MachineLocation &Location) {
   DIELoc *Loc = new (DIEValueAllocator) DIELoc();
 
   bool validReg;
-  if (Location.isReg() && !Indirect)
+  if (Location.isReg())
     validReg = addRegisterOpPiece(*Loc, Location.getReg());
   else
     validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
@@ -756,9 +750,6 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
   if (!validReg)
     return;
 
-  if (!Location.isReg() && Indirect)
-    addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
-
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, Loc);
 }
@@ -772,22 +763,20 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
                                          const MachineLocation &Location) {
   DIELoc *Loc = new (DIEValueAllocator) DIELoc();
   DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
-  DIExpression Expr = DV.getExpression();
+  assert(DV.getExpression().size() == 1);
+  const DIExpression *Expr = DV.getExpression().back();
+  bool ValidReg;
   if (Location.getOffset()) {
-    if (DwarfExpr.AddMachineRegIndirect(Location.getReg(),
-                                        Location.getOffset())) {
-      DwarfExpr.AddExpression(Expr);
-      assert(!DV.getVariable().isIndirect()
-             && "double indirection not handled");
-    }
-  } else {
-    if (DwarfExpr.AddMachineRegExpression(Expr, Location.getReg()))
-      if (DV.getVariable().isIndirect())
-        DwarfExpr.EmitOp(dwarf::DW_OP_deref);
-  }
+    ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(),
+                                               Location.getOffset());
+    if (ValidReg)
+      DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
+  } else
+    ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg());
 
   // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, Loc);
+  if (ValidReg)
+    addBlock(Die, Attribute, Loc);
 }
 
 /// Add a Dwarf loclistptr attribute data and value.
@@ -817,12 +806,12 @@ void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
   Die.addValue((dwarf::Attribute)0, Form, Value);
 }
 
-void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP,
-                                                             DIE &SPDie) {
-  DISubprogram SPDecl = SP.getFunctionDeclaration();
-  DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+void DwarfCompileUnit::applySubprogramAttributesToDefinition(
+    const DISubprogram *SP, DIE &SPDie) {
+  auto *SPDecl = SP->getDeclaration();
+  auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope());
   applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
-  addGlobalName(SP.getName(), SPDie, Context);
+  addGlobalName(SP->getName(), SPDie, Context);
 }
 
 bool DwarfCompileUnit::isDwoUnit() const {
@@ -830,7 +819,7 @@ bool DwarfCompileUnit::isDwoUnit() const {
 }
 
 bool DwarfCompileUnit::includeMinimalInlineScopes() const {
-  return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly ||
+  return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
 }
 } // end llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 91164bc..50e4a54e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -36,9 +36,6 @@ class DwarfCompileUnit : public DwarfUnit {
   /// Skeleton unit associated with this unit.
   DwarfCompileUnit *Skeleton;
 
-  /// A label at the start of the non-dwo section related to this unit.
-  MCSymbol *SectionSym;
-
   /// The start of the unit within its section.
   MCSymbol *LabelBegin;
 
@@ -69,20 +66,20 @@ class DwarfCompileUnit : public DwarfUnit {
   bool includeMinimalInlineScopes() const;
 
 public:
-  DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A,
+  DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
                    DwarfDebug *DW, DwarfFile *DWU);
 
   DwarfCompileUnit *getSkeleton() const {
     return Skeleton;
   }
 
-  void initStmtList(MCSymbol *DwarfLineSectionSym);
+  void initStmtList();
 
   /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
   void applyStmtList(DIE &D);
 
   /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
-  DIE *getOrCreateGlobalVariableDIE(DIGlobalVariable GV);
+  DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV);
 
   /// addLabelAddress - Add a dwarf label attribute data and value using
   /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
@@ -116,7 +113,7 @@ public:
   /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
   /// variables in this scope then create and insert DIEs for these
   /// variables.
-  DIE &updateSubprogramScopeDIE(DISubprogram SP);
+  DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
 
   void constructScopeDIE(LexicalScope *Scope,
                          SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren);
@@ -159,31 +156,18 @@ public:
 
   /// \brief Construct import_module DIE.
   std::unique_ptr<DIE>
-  constructImportedEntityDIE(const DIImportedEntity &Module);
+  constructImportedEntityDIE(const DIImportedEntity *Module);
 
-  void finishSubprogramDefinition(DISubprogram SP);
+  void finishSubprogramDefinition(const DISubprogram *SP);
 
-  void collectDeadVariables(DISubprogram SP);
+  void collectDeadVariables(const DISubprogram *SP);
 
   /// Set the skeleton unit associated with this unit.
   void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
 
-  MCSymbol *getSectionSym() const {
+  const MCSymbol *getSectionSym() const {
     assert(Section);
-    return SectionSym;
-  }
-
-  /// Pass in the SectionSym even though we could recreate it in every compile
-  /// unit (type units will have actually distinct symbols once they're in
-  /// comdat sections).
-  void initSection(const MCSection *Section, MCSymbol *SectionSym) {
-    DwarfUnit::initSection(Section);
-    this->SectionSym = SectionSym;
-
-    // Don't bother labeling the .dwo unit, as its offset isn't used.
-    if (!Skeleton)
-      LabelBegin =
-          Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
+    return Section->getBeginSymbol();
   }
 
   unsigned getLength() {
@@ -191,7 +175,7 @@ public:
         getHeaderSize() + UnitDie.getSize();
   }
 
-  void emitHeader(const MCSymbol *ASectionSym) const override;
+  void emitHeader(bool UseOffsets) override;
 
   MCSymbol *getLabelBegin() const {
     assert(Section);
@@ -199,10 +183,11 @@ public:
   }
 
   /// Add a new global name to the compile unit.
-  void addGlobalName(StringRef Name, DIE &Die, DIScope Context) override;
+  void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override;
 
   /// Add a new global type to the compile unit.
-  void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) override;
+  void addGlobalType(const DIType *Ty, const DIE &Die,
+                     const DIScope *Context) override;
 
   const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
   const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
@@ -213,7 +198,7 @@ public:
                           MachineLocation Location);
   /// Add an address attribute to a die based on the location provided.
   void addAddress(DIE &Die, dwarf::Attribute Attribute,
-                  const MachineLocation &Location, bool Indirect = false);
+                  const MachineLocation &Location);
 
   /// Start with the address based on the location provided, and generate the
   /// DWARF information necessary to find the actual variable (navigating the
@@ -230,7 +215,8 @@ public:
   /// Add a Dwarf expression attribute data and value.
   void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
 
-  void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
+  void applySubprogramAttributesToDefinition(const DISubprogram *SP,
+                                             DIE &SPDie);
 
   /// getRangeLists - Get the vector of range lists.
   const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index a587b46..105ff6c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,6 +14,7 @@
 #include "DwarfDebug.h"
 #include "ByteStreamer.h"
 #include "DIEHash.h"
+#include "DebugLocEntry.h"
 #include "DwarfCompileUnit.h"
 #include "DwarfExpression.h"
 #include "DwarfUnit.h"
@@ -45,6 +46,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -105,24 +107,45 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
 static const char *const DWARFGroupName = "DWARF Emission";
 static const char *const DbgTimerName = "DWARF Debug Writer";
 
+void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+  BS.EmitInt8(
+      Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
+                  : dwarf::OperationEncodingString(Op));
+}
+
+void DebugLocDwarfExpression::EmitSigned(int64_t Value) {
+  BS.EmitSLEB128(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
+  BS.EmitULEB128(Value, Twine(Value));
+}
+
+bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
+  // This information is not available while emitting .debug_loc entries.
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 
 /// resolve - Look in the DwarfDebug map for the MDNode that
 /// corresponds to the reference.
-template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const {
+template <typename T> T *DbgVariable::resolve(TypedDINodeRef<T> Ref) const {
   return DD->resolve(Ref);
 }
 
 bool DbgVariable::isBlockByrefVariable() const {
-  assert(Var.isVariable() && "Invalid complex DbgVariable!");
-  return Var.isBlockByrefVariable(DD->getTypeIdentifierMap());
+  assert(Var && "Invalid complex DbgVariable!");
+  return Var->getType()
+      .resolve(DD->getTypeIdentifierMap())
+      ->isBlockByrefStruct();
 }
 
-DIType DbgVariable::getType() const {
-  DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap());
+const DIType *DbgVariable::getType() const {
+  DIType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap());
   // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
-  if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) {
+  if (Ty->isBlockByrefStruct()) {
     /* Byref variables, in Blocks, are declared by the programmer as
        "SomeType VarName;", but the compiler creates a
        __Block_byref_x_VarName struct, and gives the variable VarName
@@ -147,17 +170,17 @@ DIType DbgVariable::getType() const {
        have a DW_AT_location that tells the debugger how to unwind through
        the pointers and __Block_byref_x_VarName struct to find the actual
        value of the variable.  The function addBlockByrefType does this.  */
-    DIType subType = Ty;
-    uint16_t tag = Ty.getTag();
+    DIType *subType = Ty;
+    uint16_t tag = Ty->getTag();
 
     if (tag == dwarf::DW_TAG_pointer_type)
-      subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom());
+      subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
 
-    DIArray Elements = DICompositeType(subType).getElements();
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIDerivedType DT(Elements.getElement(i));
-      if (getName() == DT.getName())
-        return (resolve(DT.getTypeDerivedFrom()));
+    auto Elements = cast<DICompositeTypeBase>(subType)->getElements();
+    for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+      auto *DT = cast<DIDerivedTypeBase>(Elements[i]);
+      if (getName() == DT->getName())
+        return resolve(DT->getBaseType());
     }
   }
   return Ty;
@@ -169,11 +192,12 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
     DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
 
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
-    : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0),
-      InfoHolder(A, *this, "info_string", DIEValueAllocator),
+    : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()),
+      PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator),
       UsedNonDefaultText(false),
-      SkeletonHolder(A, *this, "skel_string", DIEValueAllocator),
+      SkeletonHolder(A, "skel_string", DIEValueAllocator),
       IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
+      IsPS4(Triple(A->getTargetTriple()).isPS4()),
       AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
                                        dwarf::DW_FORM_data4)),
       AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -182,17 +206,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
                                            dwarf::DW_FORM_data4)),
       AccelTypes(TypeAtoms) {
 
-  DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr;
-  DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr;
-  DwarfLineSectionSym = nullptr;
-  DwarfAddrSectionSym = nullptr;
-  DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr;
-  FunctionBeginSym = FunctionEndSym = nullptr;
   CurFn = nullptr;
   CurMI = nullptr;
 
   // Turn on accelerator tables for Darwin by default, pubnames by
-  // default for non-Darwin, and handle split dwarf.
+  // default for non-Darwin/PS4, and handle split dwarf.
   if (DwarfAccelTables == Default)
     HasDwarfAccelTables = IsDarwin;
   else
@@ -204,7 +222,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
     HasSplitDwarf = SplitDwarf == Enable;
 
   if (DwarfPubSections == Default)
-    HasDwarfPubSections = !IsDarwin;
+    HasDwarfPubSections = !IsDarwin && !IsPS4;
   else
     HasDwarfPubSections = DwarfPubSections == Enable;
 
@@ -212,7 +230,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
 
-  Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
+  // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3).
+  // Everybody else uses GNU's.
+  UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3;
+
+  Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
 
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
@@ -223,19 +245,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
 // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
 DwarfDebug::~DwarfDebug() { }
 
-// Switch to the specified MCSection and emit an assembler
-// temporary label to it if SymbolStem is specified.
-static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
-                                const char *SymbolStem = nullptr) {
-  Asm->OutStreamer.SwitchSection(Section);
-  if (!SymbolStem)
-    return nullptr;
-
-  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
-  Asm->OutStreamer.EmitLabel(TmpSym);
-  return TmpSym;
-}
-
 static bool isObjCClass(StringRef Name) {
   return Name.startswith("+") || Name.startswith("-");
 }
@@ -264,37 +273,30 @@ static StringRef getObjCMethodName(StringRef In) {
   return In.slice(In.find(' ') + 1, In.find(']'));
 }
 
-// Helper for sorting sections into a stable output order.
-static bool SectionSort(const MCSection *A, const MCSection *B) {
-  std::string LA = (A ? A->getLabelBeginName() : "");
-  std::string LB = (B ? B->getLabelBeginName() : "");
-  return LA < LB;
-}
-
 // Add the various names to the Dwarf accelerator table names.
 // TODO: Determine whether or not we should add names for programs
 // that do not have a DW_AT_name or DW_AT_linkage_name field - this
 // is only slightly different than the lookup of non-standard ObjC names.
-void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
-  if (!SP.isDefinition())
+void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
+  if (!SP->isDefinition())
     return;
-  addAccelName(SP.getName(), Die);
+  addAccelName(SP->getName(), Die);
 
   // If the linkage name is different than the name, go ahead and output
   // that as well into the name table.
-  if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
-    addAccelName(SP.getLinkageName(), Die);
+  if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName())
+    addAccelName(SP->getLinkageName(), Die);
 
   // If this is an Objective-C selector name add it to the ObjC accelerator
   // too.
-  if (isObjCClass(SP.getName())) {
+  if (isObjCClass(SP->getName())) {
     StringRef Class, Category;
-    getObjCClassCategory(SP.getName(), Class, Category);
+    getObjCClassCategory(SP->getName(), Class, Category);
     addAccelObjC(Class, Die);
     if (Category != "")
       addAccelObjC(Category, Die);
     // Also add the base method name to the name table.
-    addAccelName(getObjCMethodName(SP.getName()), Die);
+    addAccelName(getObjCMethodName(SP->getName()), Die);
   }
 }
 
@@ -303,11 +305,10 @@ void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
 bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
   if (!Context)
     return false;
-  DIDescriptor D(Context);
-  if (D.isSubprogram())
+  if (isa<DISubprogram>(Context))
     return true;
-  if (D.isType())
-    return isSubprogramContext(resolve(DIType(Context).getContext()));
+  if (auto *T = dyn_cast<DIType>(Context))
+    return isSubprogramContext(resolve(T->getScope()));
   return false;
 }
 
@@ -362,9 +363,10 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
 
 // Create new DwarfCompileUnit for the given metadata node with tag
 // DW_TAG_compile_unit.
-DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
-  StringRef FN = DIUnit.getFilename();
-  CompilationDir = DIUnit.getDirectory();
+DwarfCompileUnit &
+DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
+  StringRef FN = DIUnit->getFilename();
+  CompilationDir = DIUnit->getDirectory();
 
   auto OwnedUnit = make_unique<DwarfCompileUnit>(
       InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
@@ -378,17 +380,17 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
   // To avoid the compilation directory being ambiguous, let the line table
   // explicitly describe the directory of all files, never relying on the
   // compilation directory.
-  if (!Asm->OutStreamer.hasRawTextSupport() || SingleCU)
-    Asm->OutStreamer.getContext().setMCLineTableCompilationDir(
+  if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
+    Asm->OutStreamer->getContext().setMCLineTableCompilationDir(
         NewCU.getUniqueID(), CompilationDir);
 
-  NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
+  NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer());
   NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
-                DIUnit.getLanguage());
+                DIUnit->getSourceLanguage());
   NewCU.addString(Die, dwarf::DW_AT_name, FN);
 
   if (!useSplitDwarf()) {
-    NewCU.initStmtList(DwarfLineSectionSym);
+    NewCU.initStmtList();
 
     // If we're using split dwarf the compilation dir is going to be in the
     // skeleton CU and so we don't need to duplicate it here.
@@ -398,23 +400,21 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
     addGnuPubAttributes(NewCU, Die);
   }
 
-  if (DIUnit.isOptimized())
+  if (DIUnit->isOptimized())
     NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
 
-  StringRef Flags = DIUnit.getFlags();
+  StringRef Flags = DIUnit->getFlags();
   if (!Flags.empty())
     NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
 
-  if (unsigned RVer = DIUnit.getRunTimeVersion())
+  if (unsigned RVer = DIUnit->getRuntimeVersion())
     NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
                   dwarf::DW_FORM_data1, RVer);
 
   if (useSplitDwarf())
-    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
-                      DwarfInfoDWOSectionSym);
+    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
   else
-    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
-                      DwarfInfoSectionSym);
+    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
   CUMap.insert(std::make_pair(DIUnit, &NewCU));
   CUDieMap.insert(std::make_pair(&Die, &NewCU));
@@ -422,11 +422,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
 }
 
 void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
-                                                  const MDNode *N) {
-  DIImportedEntity Module(N);
-  assert(Module.Verify());
-  if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext()))
-    D->addChild(TheCU.constructImportedEntityDIE(Module));
+                                                  const DIImportedEntity *N) {
+  if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
+    D->addChild(TheCU.constructImportedEntityDIE(N));
 }
 
 // Emit all Dwarf sections that should come prior to the content. Create
@@ -445,54 +443,40 @@ void DwarfDebug::beginModule() {
     return;
   TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes);
 
-  // Emit initial sections so we can reference labels later.
-  emitSectionLabels();
-
   SingleCU = CU_Nodes->getNumOperands() == 1;
 
   for (MDNode *N : CU_Nodes->operands()) {
-    DICompileUnit CUNode(N);
+    auto *CUNode = cast<DICompileUnit>(N);
     DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
-    DIArray ImportedEntities = CUNode.getImportedEntities();
-    for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
-      ScopesWithImportedEntities.push_back(std::make_pair(
-          DIImportedEntity(ImportedEntities.getElement(i)).getContext(),
-          ImportedEntities.getElement(i)));
-    std::sort(ScopesWithImportedEntities.begin(),
-              ScopesWithImportedEntities.end(), less_first());
-    DIArray GVs = CUNode.getGlobalVariables();
-    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
-      CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
-    DIArray SPs = CUNode.getSubprograms();
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-      SPMap.insert(std::make_pair(SPs.getElement(i), &CU));
-    DIArray EnumTypes = CUNode.getEnumTypes();
-    for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) {
-      DIType Ty(EnumTypes.getElement(i));
+    for (auto *IE : CUNode->getImportedEntities())
+      ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE));
+    // Stable sort to preserve the order of appearance of imported entities.
+    // This is to avoid out-of-order processing of interdependent declarations
+    // within the same scope, e.g. { namespace A = base; namespace B = A; }
+    std::stable_sort(ScopesWithImportedEntities.begin(),
+                     ScopesWithImportedEntities.end(), less_first());
+    for (auto *GV : CUNode->getGlobalVariables())
+      CU.getOrCreateGlobalVariableDIE(GV);
+    for (auto *SP : CUNode->getSubprograms())
+      SPMap.insert(std::make_pair(SP, &CU));
+    for (auto *Ty : CUNode->getEnumTypes()) {
       // The enum types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      DIType UniqueTy(resolve(Ty.getRef()));
-      CU.getOrCreateTypeDIE(UniqueTy);
+      CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef())));
     }
-    DIArray RetainedTypes = CUNode.getRetainedTypes();
-    for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) {
-      DIType Ty(RetainedTypes.getElement(i));
+    for (auto *Ty : CUNode->getRetainedTypes()) {
       // The retained types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      DIType UniqueTy(resolve(Ty.getRef()));
-      CU.getOrCreateTypeDIE(UniqueTy);
+      CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef())));
     }
     // Emit imported_modules last so that the relevant context is already
     // available.
-    for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
-      constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i));
+    for (auto *IE : CUNode->getImportedEntities())
+      constructAndAddImportedEntityDIE(CU, IE);
   }
 
   // Tell MMI that we have debug info.
   MMI->setDebugInfoAvailability(true);
-
-  // Prime section data.
-  SectionMap[Asm->getObjFileLowering().getTextSection()];
 }
 
 void DwarfDebug::finishVariableDefinitions() {
@@ -504,7 +488,8 @@ void DwarfDebug::finishVariableDefinitions() {
     // DIE::getUnit isn't simple - it walks parent pointers, etc.
     DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
     assert(Unit);
-    DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable());
+    DbgVariable *AbsVar = getExistingAbstractVariable(
+        InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
     if (AbsVar && AbsVar->getDIE()) {
       Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
                         *AbsVar->getDIE());
@@ -516,7 +501,7 @@ void DwarfDebug::finishVariableDefinitions() {
 void DwarfDebug::finishSubprogramDefinitions() {
   for (const auto &P : SPMap)
     forBothCUs(*P.second, [&](DwarfCompileUnit &CU) {
-      CU.finishSubprogramDefinition(DISubprogram(P.first));
+      CU.finishSubprogramDefinition(cast<DISubprogram>(P.first));
     });
 }
 
@@ -527,14 +512,12 @@ void DwarfDebug::collectDeadVariables() {
 
   if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
     for (MDNode *N : CU_Nodes->operands()) {
-      DICompileUnit TheCU(N);
+      auto *TheCU = cast<DICompileUnit>(N);
       // Construct subprogram DIE and add variables DIEs.
       DwarfCompileUnit *SPCU =
           static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
       assert(SPCU && "Unable to find Compile Unit!");
-      DIArray Subprograms = TheCU.getSubprograms();
-      for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
-        DISubprogram SP(Subprograms.getElement(i));
+      for (auto *SP : TheCU->getSubprograms()) {
         if (ProcessedSPNodes.count(SP) != 0)
           continue;
         SPCU->collectDeadVariables(SP);
@@ -544,6 +527,8 @@ void DwarfDebug::collectDeadVariables() {
 }
 
 void DwarfDebug::finalizeModuleInfo() {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
   finishSubprogramDefinitions();
 
   finishVariableDefinitions();
@@ -573,13 +558,16 @@ void DwarfDebug::finalizeModuleInfo() {
 
       // We don't keep track of which addresses are used in which CU so this
       // is a bit pessimistic under LTO.
-      if (!AddrPool.isEmpty())
+      if (!AddrPool.isEmpty()) {
+        const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
         SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
-                              DwarfAddrSectionSym, DwarfAddrSectionSym);
-      if (!SkCU->getRangeLists().empty())
+                              Sym, Sym);
+      }
+      if (!SkCU->getRangeLists().empty()) {
+        const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
         SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
-                              DwarfDebugRangeSectionSym,
-                              DwarfDebugRangeSectionSym);
+                              Sym, Sym);
+      }
     }
 
     // If we have code split among multiple sections or non-contiguous
@@ -597,7 +585,7 @@ void DwarfDebug::finalizeModuleInfo() {
         // 2.17.3).
         U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
       else
-        TheCU.setBaseAddress(TheCU.getRanges().front().getStart());
+        U.setBaseAddress(TheCU.getRanges().front().getStart());
       U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
     }
   }
@@ -608,53 +596,6 @@ void DwarfDebug::finalizeModuleInfo() {
     SkeletonHolder.computeSizeAndOffsets();
 }
 
-void DwarfDebug::endSections() {
-  // Filter labels by section.
-  for (const SymbolCU &SCU : ArangeLabels) {
-    if (SCU.Sym->isInSection()) {
-      // Make a note of this symbol and it's section.
-      const MCSection *Section = &SCU.Sym->getSection();
-      if (!Section->getKind().isMetadata())
-        SectionMap[Section].push_back(SCU);
-    } else {
-      // Some symbols (e.g. common/bss on mach-o) can have no section but still
-      // appear in the output. This sucks as we rely on sections to build
-      // arange spans. We can do it without, but it's icky.
-      SectionMap[nullptr].push_back(SCU);
-    }
-  }
-
-  // Build a list of sections used.
-  std::vector<const MCSection *> Sections;
-  for (const auto &it : SectionMap) {
-    const MCSection *Section = it.first;
-    Sections.push_back(Section);
-  }
-
-  // Sort the sections into order.
-  // This is only done to ensure consistent output order across different runs.
-  std::sort(Sections.begin(), Sections.end(), SectionSort);
-
-  // Add terminating symbols for each section.
-  for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) {
-    const MCSection *Section = Sections[ID];
-    MCSymbol *Sym = nullptr;
-
-    if (Section) {
-      // We can't call MCSection::getLabelEndName, as it's only safe to do so
-      // if we know the section name up-front. For user-created sections, the
-      // resulting label may not be valid to use as a label. (section names can
-      // use a greater set of characters on some systems)
-      Sym = Asm->GetTempSymbol("debug_end", ID);
-      Asm->OutStreamer.SwitchSection(Section);
-      Asm->OutStreamer.EmitLabel(Sym);
-    }
-
-    // Insert a final terminator.
-    SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
-  }
-}
-
 // Emit all Dwarf sections that should come after the content.
 void DwarfDebug::endModule() {
   assert(CurFn == nullptr);
@@ -663,24 +604,26 @@ void DwarfDebug::endModule() {
   // If we aren't actually generating debug info (check beginModule -
   // conditionalized on !DisableDebugInfoPrinting and the presence of the
   // llvm.dbg.cu metadata node)
-  if (!DwarfInfoSectionSym)
+  if (!MMI->hasDebugInfo())
     return;
 
-  // End any existing sections.
-  // TODO: Does this need to happen?
-  endSections();
-
   // Finalize the debug info for the module.
   finalizeModuleInfo();
 
   emitDebugStr();
 
-  // Emit all the DIEs into a debug info section.
-  emitDebugInfo();
+  if (useSplitDwarf())
+    emitDebugLocDWO();
+  else
+    // Emit info into a debug loc section.
+    emitDebugLoc();
 
   // Corresponding abbreviations into a abbrev section.
   emitAbbreviations();
 
+  // Emit all the DIEs into a debug info section.
+  emitDebugInfo();
+
   // Emit info into a debug aranges section.
   if (GenerateARangeSection)
     emitDebugARanges();
@@ -693,12 +636,9 @@ void DwarfDebug::endModule() {
     emitDebugInfoDWO();
     emitDebugAbbrevDWO();
     emitDebugLineDWO();
-    emitDebugLocDWO();
     // Emit DWO addresses.
     AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
-  } else
-    // Emit info into a debug loc section.
-    emitDebugLoc();
+  }
 
   // Emit info into the dwarf accelerator table sections.
   if (useDwarfAccelTables()) {
@@ -720,80 +660,80 @@ void DwarfDebug::endModule() {
 }
 
 // Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV,
-                                                     DIVariable &Cleansed) {
-  LLVMContext &Ctx = DV->getContext();
+DbgVariable *
+DwarfDebug::getExistingAbstractVariable(InlinedVariable IV,
+                                        const DILocalVariable *&Cleansed) {
   // More then one inlined variable corresponds to one abstract variable.
-  // FIXME: This duplication of variables when inlining should probably be
-  // removed. It's done to allow each DIVariable to describe its location
-  // because the DebugLoc on the dbg.value/declare isn't accurate. We should
-  // make it accurate then remove this duplication/cleansing stuff.
-  Cleansed = cleanseInlinedVariable(DV, Ctx);
+  Cleansed = IV.first;
   auto I = AbstractVariables.find(Cleansed);
   if (I != AbstractVariables.end())
     return I->second.get();
   return nullptr;
 }
 
-DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
-  DIVariable Cleansed;
-  return getExistingAbstractVariable(DV, Cleansed);
+DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
+  const DILocalVariable *Cleansed;
+  return getExistingAbstractVariable(IV, Cleansed);
 }
 
-void DwarfDebug::createAbstractVariable(const DIVariable &Var,
+void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
                                         LexicalScope *Scope) {
-  auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this);
+  auto AbsDbgVariable =
+      make_unique<DbgVariable>(Var, /* IA */ nullptr, /* Expr */ nullptr, this);
   InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
   AbstractVariables[Var] = std::move(AbsDbgVariable);
 }
 
-void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV,
+void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV,
                                                  const MDNode *ScopeNode) {
-  DIVariable Cleansed = DV;
-  if (getExistingAbstractVariable(DV, Cleansed))
+  const DILocalVariable *Cleansed = nullptr;
+  if (getExistingAbstractVariable(IV, Cleansed))
     return;
 
-  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode));
+  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+                                       cast<DILocalScope>(ScopeNode)));
 }
 
-void
-DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
-                                                    const MDNode *ScopeNode) {
-  DIVariable Cleansed = DV;
-  if (getExistingAbstractVariable(DV, Cleansed))
+void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
+    InlinedVariable IV, const MDNode *ScopeNode) {
+  const DILocalVariable *Cleansed = nullptr;
+  if (getExistingAbstractVariable(IV, Cleansed))
     return;
 
-  if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode))
+  if (LexicalScope *Scope =
+          LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
     createAbstractVariable(Cleansed, Scope);
 }
 
 // Collect variable information from side table maintained by MMI.
 void DwarfDebug::collectVariableInfoFromMMITable(
-    SmallPtrSetImpl<const MDNode *> &Processed) {
+    DenseSet<InlinedVariable> &Processed) {
   for (const auto &VI : MMI->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
-    Processed.insert(VI.Var);
+    assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+           "Expected inlined-at fields to agree");
+
+    InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
+    Processed.insert(Var);
     LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
 
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
-    DIVariable DV(VI.Var);
-    DIExpression Expr(VI.Expr);
-    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
-    ConcreteVariables.push_back(make_unique<DbgVariable>(DV, Expr, this));
-    DbgVariable *RegVar = ConcreteVariables.back().get();
-    RegVar->setFrameIndex(VI.Slot);
-    InfoHolder.addScopeVariable(Scope, RegVar);
+    const DIExpression *Expr = cast_or_null<DIExpression>(VI.Expr);
+    ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
+    auto RegVar =
+        make_unique<DbgVariable>(Var.first, Var.second, Expr, this, VI.Slot);
+    if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
+      ConcreteVariables.push_back(std::move(RegVar));
   }
 }
 
 // Get .debug_loc entry for the instruction range starting at MI.
 static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
-  const MDNode *Expr = MI->getDebugExpression();
-  const MDNode *Var = MI->getDebugVariable();
+  const DIExpression *Expr = MI->getDebugExpression();
 
   assert(MI->getNumOperands() == 4);
   if (MI->getOperand(0).isReg()) {
@@ -804,26 +744,26 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
       MLoc.set(MI->getOperand(0).getReg());
     else
       MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
-    return DebugLocEntry::Value(Var, Expr, MLoc);
+    return DebugLocEntry::Value(Expr, MLoc);
   }
   if (MI->getOperand(0).isImm())
-    return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getImm());
+    return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm());
   if (MI->getOperand(0).isFPImm())
-    return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getFPImm());
+    return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm());
   if (MI->getOperand(0).isCImm())
-    return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getCImm());
+    return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm());
 
   llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
 }
 
 /// Determine whether two variable pieces overlap.
-static bool piecesOverlap(DIExpression P1, DIExpression P2) {
-  if (!P1.isVariablePiece() || !P2.isVariablePiece())
+static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
+  if (!P1->isBitPiece() || !P2->isBitPiece())
     return true;
-  unsigned l1 = P1.getPieceOffset();
-  unsigned l2 = P2.getPieceOffset();
-  unsigned r1 = l1 + P1.getPieceSize();
-  unsigned r2 = l2 + P2.getPieceSize();
+  unsigned l1 = P1->getBitPieceOffset();
+  unsigned l2 = P2->getBitPieceOffset();
+  unsigned r1 = l1 + P1->getBitPieceSize();
+  unsigned r2 = l2 + P2->getBitPieceSize();
   // True where [l1,r1[ and [r1,r2[ overlap.
   return (l1 < r2) && (l2 < r1);
 }
@@ -842,7 +782,8 @@ static bool piecesOverlap(DIExpression P1, DIExpression P2) {
 // 1 | |    [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
 // 2 | |    ...
 // 3   |    [clobber reg0]
-// 4        [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x.
+// 4        [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of
+//                                     x.
 //
 // Output:
 //
@@ -868,7 +809,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     }
 
     // If this piece overlaps with any open ranges, truncate them.
-    DIExpression DIExpr = Begin->getDebugExpression();
+    const DIExpression *DIExpr = Begin->getDebugExpression();
     auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(),
                                [&](DebugLocEntry::Value R) {
       return piecesOverlap(DIExpr, R.getExpression());
@@ -882,7 +823,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     if (End != nullptr)
       EndLabel = getLabelAfterInsn(End);
     else if (std::next(I) == Ranges.end())
-      EndLabel = FunctionEndSym;
+      EndLabel = Asm->getFunctionEnd();
     else
       EndLabel = getLabelBeforeInsn(std::next(I)->first);
     assert(EndLabel && "Forgot label after instruction ending a range!");
@@ -894,7 +835,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     bool couldMerge = false;
 
     // If this is a piece, it may belong to the current DebugLocEntry.
-    if (DIExpr.isVariablePiece()) {
+    if (DIExpr->isBitPiece()) {
       // Add this value to the list of open ranges.
       OpenRanges.push_back(Value);
 
@@ -916,54 +857,50 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     // Attempt to coalesce the ranges of two otherwise identical
     // DebugLocEntries.
     auto CurEntry = DebugLoc.rbegin();
-    auto PrevEntry = std::next(CurEntry);
-    if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
-      DebugLoc.pop_back();
-
     DEBUG({
       dbgs() << CurEntry->getValues().size() << " Values:\n";
-      for (auto Value : CurEntry->getValues()) {
-        Value.getVariable()->dump();
+      for (auto &Value : CurEntry->getValues())
         Value.getExpression()->dump();
-      }
       dbgs() << "-----\n";
     });
+
+    auto PrevEntry = std::next(CurEntry);
+    if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
+      DebugLoc.pop_back();
   }
 }
 
 
 // Find variables for each lexical scope.
-void
-DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
-                                SmallPtrSetImpl<const MDNode *> &Processed) {
+void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
+                                     const DISubprogram *SP,
+                                     DenseSet<InlinedVariable> &Processed) {
   // Grab the variable info that was squirreled away in the MMI side-table.
   collectVariableInfoFromMMITable(Processed);
 
   for (const auto &I : DbgValues) {
-    DIVariable DV(I.first);
-    if (Processed.count(DV))
+    InlinedVariable IV = I.first;
+    if (Processed.count(IV))
       continue;
 
-    // Instruction ranges, specifying where DV is accessible.
+    // Instruction ranges, specifying where IV is accessible.
     const auto &Ranges = I.second;
     if (Ranges.empty())
       continue;
 
     LexicalScope *Scope = nullptr;
-    if (MDNode *IA = DV.getInlinedAt()) {
-      DebugLoc DL = DebugLoc::getFromDILocation(IA);
-      Scope = LScopes.findInlinedScope(DebugLoc::get(
-          DL.getLine(), DL.getCol(), DV.getContext(), IA));
-    } else
-      Scope = LScopes.findLexicalScope(DV.getContext());
+    if (const DILocation *IA = IV.second)
+      Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
+    else
+      Scope = LScopes.findLexicalScope(IV.first->getScope());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
-    Processed.insert(DV);
+    Processed.insert(IV);
     const MachineInstr *MInsn = Ranges.front().first;
     assert(MInsn->isDebugValue() && "History must begin with debug value");
-    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+    ensureAbstractVariableIsCreatedIfScoped(IV, Scope->getScopeNode());
     ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
     DbgVariable *RegVar = ConcreteVariables.back().get();
     InfoHolder.addScopeVariable(Scope, RegVar);
@@ -973,29 +910,33 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
       continue;
 
     // Handle multiple DBG_VALUE instructions describing one variable.
-    RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
-
-    DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1);
-    DebugLocList &LocList = DotDebugLocEntries.back();
-    LocList.CU = &TheCU;
-    LocList.Label =
-        Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1);
+    RegVar->setDebugLocListIndex(
+        DebugLocs.startList(&TheCU, Asm->createTempSymbol("debug_loc")));
 
     // Build the location list for this variable.
-    buildLocationList(LocList.List, Ranges);
+    SmallVector<DebugLocEntry, 8> Entries;
+    buildLocationList(Entries, Ranges);
+
+    // If the variable has an DIBasicType, extract it.  Basic types cannot have
+    // unique identifiers, so don't bother resolving the type with the
+    // identifier map.
+    const DIBasicType *BT = dyn_cast<DIBasicType>(
+        static_cast<const Metadata *>(IV.first->getType()));
+
+    // Finalize the entry by lowering it into a DWARF bytestream.
+    for (auto &Entry : Entries)
+      Entry.finalize(*Asm, DebugLocs, BT);
   }
 
   // Collect info for variables that were optimized out.
-  DIArray Variables = SP.getVariables();
-  for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
-    DIVariable DV(Variables.getElement(i));
-    assert(DV.isVariable());
-    if (!Processed.insert(DV).second)
+  for (const DILocalVariable *DV : SP->getVariables()) {
+    if (!Processed.insert(InlinedVariable(DV, nullptr)).second)
       continue;
-    if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
-      ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
-      DIExpression NoExpr;
-      ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this));
+    if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) {
+      ensureAbstractVariableIsCreatedIfScoped(InlinedVariable(DV, nullptr),
+                                              Scope->getScopeNode());
+      ConcreteVariables.push_back(make_unique<DbgVariable>(
+          DV, /* IA */ nullptr, /* Expr */ nullptr, this));
       InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get());
     }
   }
@@ -1020,23 +961,25 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   // Check if source location changes, but ignore DBG_VALUE locations.
   if (!MI->isDebugValue()) {
     DebugLoc DL = MI->getDebugLoc();
-    if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
-      unsigned Flags = 0;
-      PrevInstLoc = DL;
-      if (DL == PrologEndLoc) {
-        Flags |= DWARF2_FLAG_PROLOGUE_END;
-        PrologEndLoc = DebugLoc();
-        Flags |= DWARF2_FLAG_IS_STMT;
-      }
-      if (DL.getLine() !=
-          Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
-        Flags |= DWARF2_FLAG_IS_STMT;
+    if (DL != PrevInstLoc) {
+      if (DL) {
+        unsigned Flags = 0;
+        PrevInstLoc = DL;
+        if (DL == PrologEndLoc) {
+          Flags |= DWARF2_FLAG_PROLOGUE_END;
+          PrologEndLoc = DebugLoc();
+          Flags |= DWARF2_FLAG_IS_STMT;
+        }
+        if (DL.getLine() !=
+            Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine())
+          Flags |= DWARF2_FLAG_IS_STMT;
 
-      if (!DL.isUnknown()) {
-        const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+        const MDNode *Scope = DL.getScope();
         recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
-      } else
+      } else if (UnknownLocations) {
+        PrevInstLoc = DL;
         recordSourceLine(0, 0, nullptr, 0);
+      }
     }
   }
 
@@ -1053,8 +996,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
     return;
 
   if (!PrevLabel) {
-    PrevLabel = MMI->getContext().CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(PrevLabel);
+    PrevLabel = MMI->getContext().createTempSymbol();
+    Asm->OutStreamer->EmitLabel(PrevLabel);
   }
   I->second = PrevLabel;
 }
@@ -1081,8 +1024,8 @@ void DwarfDebug::endInstruction() {
 
   // We need a label after this instruction.
   if (!PrevLabel) {
-    PrevLabel = MMI->getContext().CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(PrevLabel);
+    PrevLabel = MMI->getContext().createTempSymbol();
+    Asm->OutStreamer->EmitLabel(PrevLabel);
   }
   I->second = PrevLabel;
 }
@@ -1119,7 +1062,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
   for (const auto &MBB : *MF)
     for (const auto &MI : MBB)
       if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
-        !MI.getDebugLoc().isUnknown()) {
+          MI.getDebugLoc()) {
         // Did the target forget to set the FrameSetup flag for CFI insns?
         assert(!MI.isCFIInstruction() &&
                "First non-frame-setup instruction is a CFI instruction.");
@@ -1166,19 +1109,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   // is absolute (such as an <> lookup header)))
   DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
-  if (Asm->OutStreamer.hasRawTextSupport())
+  if (Asm->OutStreamer->hasRawTextSupport())
     // Use a single line table if we are generating assembly.
-    Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+    Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
   else
-    Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
-
-  // Emit a label for the function so that we have a beginning address.
-  FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber());
-  // Assumes in correct section after the entry point.
-  Asm->OutStreamer.EmitLabel(FunctionBeginSym);
+    Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
 
   // Calculate history for local variables.
-  calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(),
+  calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
                            DbgValues);
 
   // Request labels for the full history.
@@ -1187,21 +1125,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
     if (Ranges.empty())
       continue;
 
-    // The first mention of a function argument gets the FunctionBeginSym
+    // The first mention of a function argument gets the CurrentFnBegin
     // label, so arguments are visible when breaking at function entry.
-    DIVariable DIVar(Ranges.front().first->getDebugVariable());
-    if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
-        getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
-      LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym;
-      if (Ranges.front().first->getDebugExpression().isVariablePiece()) {
+    const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
+    if (DIVar->getTag() == dwarf::DW_TAG_arg_variable &&
+        getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
+      LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
+      if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
         // Mark all non-overlapping initial pieces.
         for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
-          DIExpression Piece = I->first->getDebugExpression();
+          const DIExpression *Piece = I->first->getDebugExpression();
           if (std::all_of(Ranges.begin(), I,
                           [&](DbgValueHistoryMap::InstrRange Pred) {
                 return !piecesOverlap(Piece, Pred.first->getDebugExpression());
               }))
-            LabelsBeforeInsn[I->first] = FunctionBeginSym;
+            LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
           else
             break;
         }
@@ -1216,19 +1154,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   }
 
   PrevInstLoc = DebugLoc();
-  PrevLabel = FunctionBeginSym;
+  PrevLabel = Asm->getFunctionBegin();
 
   // Record beginning of function.
   PrologEndLoc = findPrologueEndLoc(MF);
-  if (!PrologEndLoc.isUnknown()) {
-    DebugLoc FnStartDL =
-        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
-    recordSourceLine(
-        FnStartDL.getLine(), FnStartDL.getCol(),
-        FnStartDL.getScope(MF->getFunction()->getContext()),
-        // We'd like to list the prologue as "not statements" but GDB behaves
-        // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
-        DWARF2_FLAG_IS_STMT);
+  if (DILocation *L = PrologEndLoc) {
+    // We'd like to list the prologue as "not statements" but GDB behaves
+    // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+    auto *SP = L->getInlinedAtScope()->getSubprogram();
+    recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
   }
 }
 
@@ -1247,27 +1181,22 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     return;
   }
 
-  // Define end label for subprogram.
-  FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber());
-  // Assumes in correct section after the entry point.
-  Asm->OutStreamer.EmitLabel(FunctionEndSym);
-
   // Set DwarfDwarfCompileUnitID in MCContext to default value.
-  Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+  Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
 
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
-  DISubprogram SP(FnScope->getScopeNode());
+  auto *SP = cast<DISubprogram>(FnScope->getScopeNode());
   DwarfCompileUnit &TheCU = *SPMap.lookup(SP);
 
-  SmallPtrSet<const MDNode *, 16> ProcessedVars;
+  DenseSet<InlinedVariable> ProcessedVars;
   collectVariableInfo(TheCU, SP, ProcessedVars);
 
   // Add the range of this function to the list of ranges for the CU.
-  TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym));
+  TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
 
   // Under -gmlt, skip building the subprogram if there are no inlined
   // subroutines inside it.
-  if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly &&
+  if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly &&
       LScopes.getAbstractScopesList().empty() && !IsDarwin) {
     assert(InfoHolder.getScopeVariables().empty());
     assert(DbgValues.empty());
@@ -1286,16 +1215,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 #endif
   // Construct abstract scopes.
   for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
-    DISubprogram SP(AScope->getScopeNode());
-    assert(SP.isSubprogram());
+    auto *SP = cast<DISubprogram>(AScope->getScopeNode());
     // Collect info for variables that were optimized out.
-    DIArray Variables = SP.getVariables();
-    for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
-      DIVariable DV(Variables.getElement(i));
-      assert(DV && DV.isVariable());
-      if (!ProcessedVars.insert(DV).second)
+    for (const DILocalVariable *DV : SP->getVariables()) {
+      if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
         continue;
-      ensureAbstractVariableIsCreated(DV, DV.getContext());
+      ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr),
+                                      DV->getScope());
       assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
              && "ensureAbstractVariableIsCreated inserted abstract scopes");
     }
@@ -1327,122 +1253,28 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
   StringRef Dir;
   unsigned Src = 1;
   unsigned Discriminator = 0;
-  if (DIScope Scope = DIScope(S)) {
-    assert(Scope.isScope());
-    Fn = Scope.getFilename();
-    Dir = Scope.getDirectory();
-    if (Scope.isLexicalBlockFile())
-      Discriminator = DILexicalBlockFile(S).getDiscriminator();
-
-    unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID();
+  if (auto *Scope = cast_or_null<DIScope>(S)) {
+    Fn = Scope->getFilename();
+    Dir = Scope->getDirectory();
+    if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
+      Discriminator = LBF->getDiscriminator();
+
+    unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
     Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
               .getOrCreateSourceID(Fn, Dir);
   }
-  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
-                                         Discriminator, Fn);
+  Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
+                                          Discriminator, Fn);
 }
 
 //===----------------------------------------------------------------------===//
 // Emit Methods
 //===----------------------------------------------------------------------===//
 
-// Emit initial Dwarf sections with a label at the start of each one.
-void DwarfDebug::emitSectionLabels() {
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  // Dwarf sections base addresses.
-  DwarfInfoSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
-  if (useSplitDwarf()) {
-    DwarfInfoDWOSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo");
-    DwarfTypesDWOSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo");
-  }
-  DwarfAbbrevSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
-  if (useSplitDwarf())
-    DwarfAbbrevDWOSectionSym = emitSectionSym(
-        Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo");
-  if (GenerateARangeSection)
-    emitSectionSym(Asm, TLOF.getDwarfARangesSection());
-
-  DwarfLineSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
-  if (GenerateGnuPubSections) {
-    DwarfGnuPubNamesSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection());
-    DwarfGnuPubTypesSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection());
-  } else if (HasDwarfPubSections) {
-    emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
-    emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
-  }
-
-  DwarfStrSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
-  if (useSplitDwarf()) {
-    DwarfStrDWOSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
-    DwarfAddrSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
-    DwarfDebugLocSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc");
-  } else
-    DwarfDebugLocSectionSym =
-        emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc");
-  DwarfDebugRangeSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range");
-}
-
-// Recursively emits a debug information entry.
-void DwarfDebug::emitDIE(DIE &Die) {
-  // Get the abbreviation for this DIE.
-  const DIEAbbrev &Abbrev = Die.getAbbrev();
-
-  // Emit the code (index) for the abbreviation.
-  if (Asm->isVerbose())
-    Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
-                                "] 0x" + Twine::utohexstr(Die.getOffset()) +
-                                ":0x" + Twine::utohexstr(Die.getSize()) + " " +
-                                dwarf::TagString(Abbrev.getTag()));
-  Asm->EmitULEB128(Abbrev.getNumber());
-
-  const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
-  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
-
-  // Emit the DIE attribute values.
-  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
-    dwarf::Attribute Attr = AbbrevData[i].getAttribute();
-    dwarf::Form Form = AbbrevData[i].getForm();
-    assert(Form && "Too many attributes for DIE (check abbreviation)");
-
-    if (Asm->isVerbose()) {
-      Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
-      if (Attr == dwarf::DW_AT_accessibility)
-        Asm->OutStreamer.AddComment(dwarf::AccessibilityString(
-            cast<DIEInteger>(Values[i])->getValue()));
-    }
-
-    // Emit an attribute using the defined form.
-    Values[i]->EmitValue(Asm, Form);
-  }
-
-  // Emit the DIE children if any.
-  if (Abbrev.hasChildren()) {
-    for (auto &Child : Die.getChildren())
-      emitDIE(*Child);
-
-    Asm->OutStreamer.AddComment("End Of Children Mark");
-    Asm->EmitInt8(0);
-  }
-}
-
 // Emit the debug info section.
 void DwarfDebug::emitDebugInfo() {
   DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
-
-  Holder.emitUnits(DwarfAbbrevSectionSym);
+  Holder.emitUnits(/* UseOffsets */ false);
 }
 
 // Emit the abbreviation section.
@@ -1452,65 +1284,39 @@ void DwarfDebug::emitAbbreviations() {
   Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
 }
 
-// Emit the last address of the section and the end of the line matrix.
-void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
-  // Define last address of section.
-  Asm->OutStreamer.AddComment("Extended Op");
-  Asm->EmitInt8(0);
-
-  Asm->OutStreamer.AddComment("Op size");
-  Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
-  Asm->OutStreamer.AddComment("DW_LNE_set_address");
-  Asm->EmitInt8(dwarf::DW_LNE_set_address);
-
-  Asm->OutStreamer.AddComment("Section end label");
-
-  Asm->OutStreamer.EmitSymbolValue(
-      Asm->GetTempSymbol("section_end", SectionEnd),
-      Asm->getDataLayout().getPointerSize());
-
-  // Mark end of matrix.
-  Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
-  Asm->EmitInt8(0);
-  Asm->EmitInt8(1);
-  Asm->EmitInt8(1);
-}
-
-void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
-                           StringRef TableName, StringRef SymName) {
+void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section,
+                           StringRef TableName) {
   Accel.FinalizeTable(Asm, TableName);
-  Asm->OutStreamer.SwitchSection(Section);
-  auto *SectionBegin = Asm->GetTempSymbol(SymName);
-  Asm->OutStreamer.EmitLabel(SectionBegin);
+  Asm->OutStreamer->SwitchSection(Section);
 
   // Emit the full data.
-  Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym);
+  Accel.emit(Asm, Section->getBeginSymbol(), this);
 }
 
 // Emit visible names into a hashed accelerator table section.
 void DwarfDebug::emitAccelNames() {
   emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
-            "Names", "names_begin");
+            "Names");
 }
 
 // Emit objective C classes and categories into a hashed accelerator table
 // section.
 void DwarfDebug::emitAccelObjC() {
   emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
-            "ObjC", "objc_begin");
+            "ObjC");
 }
 
 // Emit namespace dies into a hashed accelerator table.
 void DwarfDebug::emitAccelNamespaces() {
   emitAccel(AccelNamespace,
             Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
-            "namespac", "namespac_begin");
+            "namespac");
 }
 
 // Emit type dies into a hashed accelerator table.
 void DwarfDebug::emitAccelTypes() {
   emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
-            "types", "types_begin");
+            "types");
 }
 
 // Public name handling.
@@ -1559,7 +1365,6 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
     return dwarf::GIEK_TYPE;
   case dwarf::DW_TAG_subprogram:
     return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
-  case dwarf::DW_TAG_constant:
   case dwarf::DW_TAG_variable:
     return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
   case dwarf::DW_TAG_enumerator:
@@ -1573,16 +1378,16 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
 /// emitDebugPubNames - Emit visible names into a debug pubnames section.
 ///
 void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
-  const MCSection *PSec =
-      GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
-               : Asm->getObjFileLowering().getDwarfPubNamesSection();
+  MCSection *PSec = GnuStyle
+                        ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
+                        : Asm->getObjFileLowering().getDwarfPubNamesSection();
 
   emitDebugPubSection(GnuStyle, PSec, "Names",
                       &DwarfCompileUnit::getGlobalNames);
 }
 
 void DwarfDebug::emitDebugPubSection(
-    bool GnuStyle, const MCSection *PSec, StringRef Name,
+    bool GnuStyle, MCSection *PSec, StringRef Name,
     const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) {
   for (const auto &NU : CUMap) {
     DwarfCompileUnit *TheU = NU.second;
@@ -1594,26 +1399,25 @@ void DwarfDebug::emitDebugPubSection(
 
     if (auto *Skeleton = TheU->getSkeleton())
       TheU = Skeleton;
-    unsigned ID = TheU->getUniqueID();
 
     // Start the dwarf pubnames section.
-    Asm->OutStreamer.SwitchSection(PSec);
+    Asm->OutStreamer->SwitchSection(PSec);
 
     // Emit the header.
-    Asm->OutStreamer.AddComment("Length of Public " + Name + " Info");
-    MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID);
-    MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID);
+    Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
+    MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
+    MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
     Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-    Asm->OutStreamer.EmitLabel(BeginLabel);
+    Asm->OutStreamer->EmitLabel(BeginLabel);
 
-    Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->OutStreamer->AddComment("DWARF Version");
     Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
 
-    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym());
+    Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
+    Asm->emitSectionOffset(TheU->getLabelBegin());
 
-    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->OutStreamer->AddComment("Compilation Unit Length");
     Asm->EmitInt32(TheU->getLength());
 
     // Emit the pubnames for this compilation unit.
@@ -1621,31 +1425,31 @@ void DwarfDebug::emitDebugPubSection(
       const char *Name = GI.getKeyData();
       const DIE *Entity = GI.second;
 
-      Asm->OutStreamer.AddComment("DIE offset");
+      Asm->OutStreamer->AddComment("DIE offset");
       Asm->EmitInt32(Entity->getOffset());
 
       if (GnuStyle) {
         dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
-        Asm->OutStreamer.AddComment(
+        Asm->OutStreamer->AddComment(
             Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
             dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
         Asm->EmitInt8(Desc.toBits());
       }
 
-      Asm->OutStreamer.AddComment("External Name");
-      Asm->OutStreamer.EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
+      Asm->OutStreamer->AddComment("External Name");
+      Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
     }
 
-    Asm->OutStreamer.AddComment("End Mark");
+    Asm->OutStreamer->AddComment("End Mark");
     Asm->EmitInt32(0);
-    Asm->OutStreamer.EmitLabel(EndLabel);
+    Asm->OutStreamer->EmitLabel(EndLabel);
   }
 }
 
 void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
-  const MCSection *PSec =
-      GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
-               : Asm->getObjFileLowering().getDwarfPubTypesSection();
+  MCSection *PSec = GnuStyle
+                        ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
+                        : Asm->getObjFileLowering().getDwarfPubTypesSection();
 
   emitDebugPubSection(GnuStyle, PSec, "Types",
                       &DwarfCompileUnit::getGlobalTypes);
@@ -1657,86 +1461,44 @@ void DwarfDebug::emitDebugStr() {
   Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
 }
 
-/// Emits an optimal (=sorted) sequence of DW_OP_pieces.
-void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
-                               const DITypeIdentifierMap &Map,
-                               ArrayRef<DebugLocEntry::Value> Values) {
-  assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
-        return P.isVariablePiece();
-      }) && "all values are expected to be pieces");
-  assert(std::is_sorted(Values.begin(), Values.end()) &&
-         "pieces are expected to be sorted");
-
-  unsigned Offset = 0;
-  for (auto Piece : Values) {
-    const unsigned SizeOfByte = 8;
-    DIExpression Expr = Piece.getExpression();
-    unsigned PieceOffset = Expr.getPieceOffset();
-    unsigned PieceSize = Expr.getPieceSize();
-    assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
-    if (Offset < PieceOffset) {
-      // The DWARF spec seriously mandates pieces with no locations for gaps.
-      Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*SizeOfByte);
-      Offset += PieceOffset-Offset;
-    }
-    Offset += PieceSize;
-
-#ifndef NDEBUG
-    DIVariable Var = Piece.getVariable();
-    assert(!Var.isIndirect() && "indirect address for piece");
-    unsigned VarSize = Var.getSizeInBits(Map);
-    assert(PieceSize+PieceOffset <= VarSize/SizeOfByte
-           && "piece is larger than or outside of variable");
-    assert(PieceSize*SizeOfByte != VarSize
-           && "piece covers entire variable");
-#endif
-
-    emitDebugLocValue(Streamer, Piece, PieceOffset*SizeOfByte);
-  }
-}
-
-
 void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
-                                   const DebugLocEntry &Entry) {
-  const DebugLocEntry::Value Value = Entry.getValues()[0];
-  if (Value.isVariablePiece())
-    // Emit all pieces that belong to the same variable and range.
-    return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues());
-
-  assert(Entry.getValues().size() == 1 && "only pieces may have >1 value");
-  emitDebugLocValue(Streamer, Value);
-}
-
-void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
-                                   const DebugLocEntry::Value &Value,
-                                   unsigned PieceOffsetInBits) {
-  DIVariable DV = Value.getVariable();
-  DebugLocDwarfExpression DwarfExpr(*Asm, Streamer);
-
+                                   const DebugLocStream::Entry &Entry) {
+  auto &&Comments = DebugLocs.getComments(Entry);
+  auto Comment = Comments.begin();
+  auto End = Comments.end();
+  for (uint8_t Byte : DebugLocs.getBytes(Entry))
+    Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
+}
+
+static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+                              ByteStreamer &Streamer,
+                              const DebugLocEntry::Value &Value,
+                              unsigned PieceOffsetInBits) {
+  DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(),
+                                    AP.getDwarfDebug()->getDwarfVersion(),
+                                    Streamer);
   // Regular entry.
   if (Value.isInt()) {
-    DIBasicType BTy(resolve(DV.getType()));
-    if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
-                         BTy.getEncoding() == dwarf::DW_ATE_signed_char))
+    if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
+               BT->getEncoding() == dwarf::DW_ATE_signed_char))
       DwarfExpr.AddSignedConstant(Value.getInt());
     else
       DwarfExpr.AddUnsignedConstant(Value.getInt());
   } else if (Value.isLocation()) {
     MachineLocation Loc = Value.getLoc();
-    DIExpression Expr = Value.getExpression();
-    if (!Expr || (Expr.getNumElements() == 0))
+    const DIExpression *Expr = Value.getExpression();
+    if (!Expr || !Expr->getNumElements())
       // Regular entry.
-      Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
+      AP.EmitDwarfRegOp(Streamer, Loc);
     else {
       // Complex address entry.
       if (Loc.getOffset()) {
         DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset());
-        DwarfExpr.AddExpression(Expr, PieceOffsetInBits);
+        DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
+                                PieceOffsetInBits);
       } else
         DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(),
                                           PieceOffsetInBits);
-      if (DV.isIndirect())
-        DwarfExpr.EmitOp(dwarf::DW_OP_deref);
     }
   }
   // else ... ignore constant fp. There is not any good way to
@@ -1744,61 +1506,95 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
   // FIXME: ^
 }
 
-void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) {
-  Asm->OutStreamer.AddComment("Loc expr size");
-  MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
-  MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
-  Asm->EmitLabelDifference(end, begin, 2);
-  Asm->OutStreamer.EmitLabel(begin);
+void DebugLocEntry::finalize(const AsmPrinter &AP, DebugLocStream &Locs,
+                             const DIBasicType *BT) {
+  Locs.startEntry(Begin, End);
+  BufferByteStreamer Streamer = Locs.getStreamer();
+  const DebugLocEntry::Value &Value = Values[0];
+  if (Value.isBitPiece()) {
+    // Emit all pieces that belong to the same variable and range.
+    assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
+          return P.isBitPiece();
+        }) && "all values are expected to be pieces");
+    assert(std::is_sorted(Values.begin(), Values.end()) &&
+           "pieces are expected to be sorted");
+   
+    unsigned Offset = 0;
+    for (auto Piece : Values) {
+      const DIExpression *Expr = Piece.getExpression();
+      unsigned PieceOffset = Expr->getBitPieceOffset();
+      unsigned PieceSize = Expr->getBitPieceSize();
+      assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
+      if (Offset < PieceOffset) {
+        // The DWARF spec seriously mandates pieces with no locations for gaps.
+        DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(),
+                                     AP.getDwarfDebug()->getDwarfVersion(),
+                                     Streamer);
+        Expr.AddOpPiece(PieceOffset-Offset, 0);
+        Offset += PieceOffset-Offset;
+      }
+      Offset += PieceSize;
+
+      emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset);
+    }
+  } else {
+    assert(Values.size() == 1 && "only pieces may have >1 value");
+    emitDebugLocValue(AP, BT, Streamer, Value, 0);
+  }
+}
+
+void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
+  // Emit the size.
+  Asm->OutStreamer->AddComment("Loc expr size");
+  Asm->EmitInt16(DebugLocs.getBytes(Entry).size());
+
   // Emit the entry.
   APByteStreamer Streamer(*Asm);
   emitDebugLocEntry(Streamer, Entry);
-  // Close the range.
-  Asm->OutStreamer.EmitLabel(end);
 }
 
 // Emit locations into the debug loc section.
 void DwarfDebug::emitDebugLoc() {
   // Start the dwarf loc section.
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfLocSection());
   unsigned char Size = Asm->getDataLayout().getPointerSize();
-  for (const auto &DebugLoc : DotDebugLocEntries) {
-    Asm->OutStreamer.EmitLabel(DebugLoc.Label);
-    const DwarfCompileUnit *CU = DebugLoc.CU;
-    for (const auto &Entry : DebugLoc.List) {
+  for (const auto &List : DebugLocs.getLists()) {
+    Asm->OutStreamer->EmitLabel(List.Label);
+    const DwarfCompileUnit *CU = List.CU;
+    for (const auto &Entry : DebugLocs.getEntries(List)) {
       // Set up the range. This range is relative to the entry point of the
       // compile unit. This is a hard coded 0 for low_pc when we're emitting
       // ranges, or the DW_AT_low_pc on the compile unit otherwise.
       if (auto *Base = CU->getBaseAddress()) {
-        Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size);
-        Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size);
+        Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
+        Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
       } else {
-        Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size);
-        Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size);
+        Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
+        Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
       }
 
       emitDebugLocEntryLocation(Entry);
     }
-    Asm->OutStreamer.EmitIntValue(0, Size);
-    Asm->OutStreamer.EmitIntValue(0, Size);
+    Asm->OutStreamer->EmitIntValue(0, Size);
+    Asm->OutStreamer->EmitIntValue(0, Size);
   }
 }
 
 void DwarfDebug::emitDebugLocDWO() {
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfLocDWOSection());
-  for (const auto &DebugLoc : DotDebugLocEntries) {
-    Asm->OutStreamer.EmitLabel(DebugLoc.Label);
-    for (const auto &Entry : DebugLoc.List) {
+  for (const auto &List : DebugLocs.getLists()) {
+    Asm->OutStreamer->EmitLabel(List.Label);
+    for (const auto &Entry : DebugLocs.getEntries(List)) {
       // Just always use start_length for now - at least that's one address
       // rather than two. We could get fancier and try to, say, reuse an
       // address we know we've emitted elsewhere (the start of the function?
       // The start of the CU or CU subrange that encloses this range?)
       Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
-      unsigned idx = AddrPool.getIndex(Entry.getBeginSym());
+      unsigned idx = AddrPool.getIndex(Entry.BeginSym);
       Asm->EmitULEB128(idx);
-      Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4);
+      Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
 
       emitDebugLocEntryLocation(Entry);
     }
@@ -1813,36 +1609,62 @@ struct ArangeSpan {
 // Emit a debug aranges section, containing a CU lookup for any
 // address we can tie back to a CU.
 void DwarfDebug::emitDebugARanges() {
-  // Start the dwarf aranges section.
-  Asm->OutStreamer.SwitchSection(
-      Asm->getObjFileLowering().getDwarfARangesSection());
+  // Provides a unique id per text section.
+  MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
 
-  typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> SpansType;
+  // Filter labels by section.
+  for (const SymbolCU &SCU : ArangeLabels) {
+    if (SCU.Sym->isInSection()) {
+      // Make a note of this symbol and it's section.
+      MCSection *Section = &SCU.Sym->getSection();
+      if (!Section->getKind().isMetadata())
+        SectionMap[Section].push_back(SCU);
+    } else {
+      // Some symbols (e.g. common/bss on mach-o) can have no section but still
+      // appear in the output. This sucks as we rely on sections to build
+      // arange spans. We can do it without, but it's icky.
+      SectionMap[nullptr].push_back(SCU);
+    }
+  }
+
+  // Add terminating symbols for each section.
+  for (const auto &I : SectionMap) {
+    MCSection *Section = I.first;
+    MCSymbol *Sym = nullptr;
 
-  SpansType Spans;
+    if (Section)
+      Sym = Asm->OutStreamer->endSection(Section);
 
-  // Build a list of sections used.
-  std::vector<const MCSection *> Sections;
-  for (const auto &it : SectionMap) {
-    const MCSection *Section = it.first;
-    Sections.push_back(Section);
+    // Insert a final terminator.
+    SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
   }
 
-  // Sort the sections into order.
-  // This is only done to ensure consistent output order across different runs.
-  std::sort(Sections.begin(), Sections.end(), SectionSort);
+  DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
 
-  // Build a set of address spans, sorted by CU.
-  for (const MCSection *Section : Sections) {
-    SmallVector<SymbolCU, 8> &List = SectionMap[Section];
+  for (auto &I : SectionMap) {
+    const MCSection *Section = I.first;
+    SmallVector<SymbolCU, 8> &List = I.second;
     if (List.size() < 2)
       continue;
 
+    // If we have no section (e.g. common), just write out
+    // individual spans for each symbol.
+    if (!Section) {
+      for (const SymbolCU &Cur : List) {
+        ArangeSpan Span;
+        Span.Start = Cur.Sym;
+        Span.End = nullptr;
+        if (Cur.CU)
+          Spans[Cur.CU].push_back(Span);
+      }
+      continue;
+    }
+
     // Sort the symbols by offset within the section.
     std::sort(List.begin(), List.end(),
               [&](const SymbolCU &A, const SymbolCU &B) {
-      unsigned IA = A.Sym ? Asm->OutStreamer.GetSymbolOrder(A.Sym) : 0;
-      unsigned IB = B.Sym ? Asm->OutStreamer.GetSymbolOrder(B.Sym) : 0;
+      unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
+      unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
 
       // Symbols with no order assigned should be placed at the end.
       // (e.g. section end labels)
@@ -1853,35 +1675,27 @@ void DwarfDebug::emitDebugARanges() {
       return IA < IB;
     });
 
-    // If we have no section (e.g. common), just write out
-    // individual spans for each symbol.
-    if (!Section) {
-      for (const SymbolCU &Cur : List) {
+    // Build spans between each label.
+    const MCSymbol *StartSym = List[0].Sym;
+    for (size_t n = 1, e = List.size(); n < e; n++) {
+      const SymbolCU &Prev = List[n - 1];
+      const SymbolCU &Cur = List[n];
+
+      // Try and build the longest span we can within the same CU.
+      if (Cur.CU != Prev.CU) {
         ArangeSpan Span;
-        Span.Start = Cur.Sym;
-        Span.End = nullptr;
-        if (Cur.CU)
-          Spans[Cur.CU].push_back(Span);
-      }
-    } else {
-      // Build spans between each label.
-      const MCSymbol *StartSym = List[0].Sym;
-      for (size_t n = 1, e = List.size(); n < e; n++) {
-        const SymbolCU &Prev = List[n - 1];
-        const SymbolCU &Cur = List[n];
-
-        // Try and build the longest span we can within the same CU.
-        if (Cur.CU != Prev.CU) {
-          ArangeSpan Span;
-          Span.Start = StartSym;
-          Span.End = Cur.Sym;
-          Spans[Prev.CU].push_back(Span);
-          StartSym = Cur.Sym;
-        }
+        Span.Start = StartSym;
+        Span.End = Cur.Sym;
+        Spans[Prev.CU].push_back(Span);
+        StartSym = Cur.Sym;
       }
     }
   }
 
+  // Start the dwarf aranges section.
+  Asm->OutStreamer->SwitchSection(
+      Asm->getObjFileLowering().getDwarfARangesSection());
+
   unsigned PtrSize = Asm->getDataLayout().getPointerSize();
 
   // Build a list of CUs used.
@@ -1921,18 +1735,18 @@ void DwarfDebug::emitDebugARanges() {
     ContentSize += (List.size() + 1) * TupleSize;
 
     // For each compile unit, write the list of spans it covers.
-    Asm->OutStreamer.AddComment("Length of ARange Set");
+    Asm->OutStreamer->AddComment("Length of ARange Set");
     Asm->EmitInt32(ContentSize);
-    Asm->OutStreamer.AddComment("DWARF Arange version number");
+    Asm->OutStreamer->AddComment("DWARF Arange version number");
     Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
-    Asm->OutStreamer.AddComment("Offset Into Debug Info Section");
-    Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym());
-    Asm->OutStreamer.AddComment("Address Size (in bytes)");
+    Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
+    Asm->emitSectionOffset(CU->getLabelBegin());
+    Asm->OutStreamer->AddComment("Address Size (in bytes)");
     Asm->EmitInt8(PtrSize);
-    Asm->OutStreamer.AddComment("Segment Size (in bytes)");
+    Asm->OutStreamer->AddComment("Segment Size (in bytes)");
     Asm->EmitInt8(0);
 
-    Asm->OutStreamer.EmitFill(Padding, 0xff);
+    Asm->OutStreamer->EmitFill(Padding, 0xff);
 
     for (const ArangeSpan &Span : List) {
       Asm->EmitLabelReference(Span.Start, PtrSize);
@@ -1947,20 +1761,20 @@ void DwarfDebug::emitDebugARanges() {
         if (Size == 0)
           Size = 1;
 
-        Asm->OutStreamer.EmitIntValue(Size, PtrSize);
+        Asm->OutStreamer->EmitIntValue(Size, PtrSize);
       }
     }
 
-    Asm->OutStreamer.AddComment("ARange terminator");
-    Asm->OutStreamer.EmitIntValue(0, PtrSize);
-    Asm->OutStreamer.EmitIntValue(0, PtrSize);
+    Asm->OutStreamer->AddComment("ARange terminator");
+    Asm->OutStreamer->EmitIntValue(0, PtrSize);
+    Asm->OutStreamer->EmitIntValue(0, PtrSize);
   }
 }
 
 // Emit visible names into a debug ranges section.
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfRangesSection());
 
   // Size for our labels.
@@ -1976,7 +1790,7 @@ void DwarfDebug::emitDebugRanges() {
     // Iterate over the misc ranges for the compile units in the module.
     for (const RangeSpanList &List : TheCU->getRangeLists()) {
       // Emit our symbol so we can find the beginning of the range.
-      Asm->OutStreamer.EmitLabel(List.getSym());
+      Asm->OutStreamer->EmitLabel(List.getSym());
 
       for (const RangeSpan &Range : List.getRanges()) {
         const MCSymbol *Begin = Range.getStart();
@@ -1987,14 +1801,14 @@ void DwarfDebug::emitDebugRanges() {
           Asm->EmitLabelDifference(Begin, Base, Size);
           Asm->EmitLabelDifference(End, Base, Size);
         } else {
-          Asm->OutStreamer.EmitSymbolValue(Begin, Size);
-          Asm->OutStreamer.EmitSymbolValue(End, Size);
+          Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+          Asm->OutStreamer->EmitSymbolValue(End, Size);
         }
       }
 
       // And terminate the list with two 0 values.
-      Asm->OutStreamer.EmitIntValue(0, Size);
-      Asm->OutStreamer.EmitIntValue(0, Size);
+      Asm->OutStreamer->EmitIntValue(0, Size);
+      Asm->OutStreamer->EmitIntValue(0, Size);
     }
   }
 }
@@ -2004,7 +1818,7 @@ void DwarfDebug::emitDebugRanges() {
 void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
                                   std::unique_ptr<DwarfUnit> NewU) {
   NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
-                  U.getCUNode().getSplitDebugFilename());
+                  U.getCUNode()->getSplitDebugFilename());
 
   if (!CompilationDir.empty())
     NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -2022,10 +1836,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
   auto OwnedUnit = make_unique<DwarfCompileUnit>(
       CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
   DwarfCompileUnit &NewCU = *OwnedUnit;
-  NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
-                    DwarfInfoSectionSym);
+  NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
-  NewCU.initStmtList(DwarfLineSectionSym);
+  NewCU.initStmtList();
 
   initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
 
@@ -2036,9 +1849,8 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
 // compile units that would normally be in debug_info.
 void DwarfDebug::emitDebugInfoDWO() {
   assert(useSplitDwarf() && "No split dwarf debug info?");
-  // Don't pass an abbrev symbol, using a constant zero instead so as not to
-  // emit relocations into the dwo file.
-  InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr);
+  // Don't emit relocations into the dwo file.
+  InfoHolder.emitUnits(/* UseOffsets */ true);
 }
 
 // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
@@ -2050,9 +1862,9 @@ void DwarfDebug::emitDebugAbbrevDWO() {
 
 void DwarfDebug::emitDebugLineDWO() {
   assert(useSplitDwarf() && "No split dwarf?");
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfLineDWOSection());
-  SplitTypeUnitFileTable.Emit(Asm->OutStreamer);
+  SplitTypeUnitFileTable.Emit(*Asm->OutStreamer);
 }
 
 // Emit the .debug_str.dwo section for separated dwarf. This contains the
@@ -2060,8 +1872,7 @@ void DwarfDebug::emitDebugLineDWO() {
 // sections.
 void DwarfDebug::emitDebugStrDWO() {
   assert(useSplitDwarf() && "No split dwarf?");
-  const MCSection *OffSec =
-      Asm->getObjFileLowering().getDwarfStrOffDWOSection();
+  MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
   InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
                          OffSec);
 }
@@ -2070,7 +1881,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
   if (!useSplitDwarf())
     return nullptr;
   if (SingleCU)
-    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory());
+    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory());
   return &SplitTypeUnitFileTable;
 }
 
@@ -2082,12 +1893,12 @@ static uint64_t makeTypeSignature(StringRef Identifier) {
   // appropriately.
   MD5::MD5Result Result;
   Hash.final(Result);
-  return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  return support::endian::read64le(Result + 8);
 }
 
 void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
                                       StringRef Identifier, DIE &RefDie,
-                                      DICompositeType CTy) {
+                                      const DICompositeType *CTy) {
   // Fast path if we're building some type units and one has already used the
   // address pool we know we're going to throw away all this work anyway, so
   // don't bother building dependent types.
@@ -2146,7 +1957,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
       // This is inefficient because all the dependent types will be rebuilt
       // from scratch, including building them in type units, discovering that
       // they depend on addresses, throwing them out and rebuilding them.
-      CU.constructTypeDIE(RefDie, CTy);
+      CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
       return;
     }
 
@@ -2165,27 +1976,23 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
   if (!useDwarfAccelTables())
     return;
-  AccelNames.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
-                     &Die);
+  AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
 
 void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
   if (!useDwarfAccelTables())
     return;
-  AccelObjC.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
-                    &Die);
+  AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
 
 void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
   if (!useDwarfAccelTables())
     return;
-  AccelNamespace.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
-                         &Die);
+  AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
 
 void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
   if (!useDwarfAccelTables())
     return;
-  AccelTypes.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
-                     &Die);
+  AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index a1a9426..700f736 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -16,11 +16,11 @@
 
 #include "AsmPrinterHandler.h"
 #include "DbgValueHistoryCalculator.h"
-#include "DebugLocEntry.h"
-#include "DebugLocList.h"
+#include "DebugLocStream.h"
 #include "DwarfAccelTable.h"
 #include "DwarfFile.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -41,6 +41,7 @@ class AsmPrinter;
 class ByteStreamer;
 class ConstantInt;
 class ConstantFP;
+class DebugLocEntry;
 class DwarfCompileUnit;
 class DwarfDebug;
 class DwarfTypeUnit;
@@ -67,81 +68,111 @@ public:
 
 //===----------------------------------------------------------------------===//
 /// \brief This class is used to track local variable information.
+///
+/// - Variables whose location changes over time have a DebugLocListIndex and
+///   the other fields are not used.
+///
+/// - Variables that are described by multiple MMI table entries have multiple
+///   expressions and frame indices.
 class DbgVariable {
-  DIVariable Var;             // Variable Descriptor.
-  DIExpression Expr;          // Complex address location expression.
-  DIE *TheDIE;                // Variable DIE.
-  unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
-  const MachineInstr *MInsn;  // DBG_VALUE instruction of the variable.
-  int FrameIndex;
+  const DILocalVariable *Var; /// Variable Descriptor.
+  const DILocation *IA;       /// Inlined at location.
+  SmallVector<const DIExpression *, 1>
+      Expr;                          /// Complex address location expression.
+  DIE *TheDIE;                /// Variable DIE.
+  unsigned DebugLocListIndex;        /// Offset in DebugLocs.
+  const MachineInstr *MInsn;  /// DBG_VALUE instruction of the variable.
+  SmallVector<int, 1> FrameIndex; /// Frame index of the variable.
   DwarfDebug *DD;
 
 public:
-  /// Construct a DbgVariable from a DIVariable.
-  DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD)
-      : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U),
-        MInsn(nullptr), FrameIndex(~0), DD(DD) {
-    assert(Var.Verify() && Expr.Verify());
+  /// Construct a DbgVariable from a variable.
+  DbgVariable(const DILocalVariable *V, const DILocation *IA,
+              const DIExpression *E, DwarfDebug *DD, int FI = ~0)
+      : Var(V), IA(IA), Expr(1, E), TheDIE(nullptr), DebugLocListIndex(~0U),
+        MInsn(nullptr), DD(DD) {
+    FrameIndex.push_back(FI);
+    assert(!E || E->isValid());
   }
 
   /// Construct a DbgVariable from a DEBUG_VALUE.
   /// AbstractVar may be NULL.
   DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
-      : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()),
-        TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue),
-        FrameIndex(~0), DD(DD) {}
+      : Var(DbgValue->getDebugVariable()),
+        IA(DbgValue->getDebugLoc()->getInlinedAt()),
+        Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr),
+        DebugLocListIndex(~0U), MInsn(DbgValue), DD(DD) {
+    FrameIndex.push_back(~0);
+  }
 
   // Accessors.
-  DIVariable getVariable() const { return Var; }
-  DIExpression getExpression() const { return Expr; }
+  const DILocalVariable *getVariable() const { return Var; }
+  const DILocation *getInlinedAt() const { return IA; }
+  const ArrayRef<const DIExpression *> getExpression() const { return Expr; }
   void setDIE(DIE &D) { TheDIE = &D; }
   DIE *getDIE() const { return TheDIE; }
-  void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
-  unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
-  StringRef getName() const { return Var.getName(); }
+  void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
+  unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+  StringRef getName() const { return Var->getName(); }
   const MachineInstr *getMInsn() const { return MInsn; }
-  int getFrameIndex() const { return FrameIndex; }
-  void setFrameIndex(int FI) { FrameIndex = FI; }
+  const ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+
+  void addMMIEntry(const DbgVariable &V) {
+    assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
+    assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
+    assert(V.Var == Var && "conflicting variable");
+    assert(V.IA == IA && "conflicting inlined-at location");
+
+    if (V.getFrameIndex().back() != ~0) {
+      auto E = V.getExpression();
+      auto FI = V.getFrameIndex();
+      Expr.append(E.begin(), E.end());
+      FrameIndex.append(FI.begin(), FI.end());
+    }
+    assert(Expr.size() > 1 ? std::all_of(Expr.begin(), Expr.end(),
+                                         [](const DIExpression *E) {
+                                           return E->isBitPiece();
+                                         })
+                           : (true && "conflicting locations for variable"));
+  }
+
   // Translate tag to proper Dwarf tag.
   dwarf::Tag getTag() const {
-    if (Var.getTag() == dwarf::DW_TAG_arg_variable)
+    if (Var->getTag() == dwarf::DW_TAG_arg_variable)
       return dwarf::DW_TAG_formal_parameter;
 
     return dwarf::DW_TAG_variable;
   }
   /// \brief Return true if DbgVariable is artificial.
   bool isArtificial() const {
-    if (Var.isArtificial())
+    if (Var->isArtificial())
       return true;
-    if (getType().isArtificial())
+    if (getType()->isArtificial())
       return true;
     return false;
   }
 
   bool isObjectPointer() const {
-    if (Var.isObjectPointer())
+    if (Var->isObjectPointer())
       return true;
-    if (getType().isObjectPointer())
+    if (getType()->isObjectPointer())
       return true;
     return false;
   }
 
   bool variableHasComplexAddress() const {
-    assert(Var.isVariable() && "Invalid complex DbgVariable!");
-    return Expr.getNumElements() > 0;
+    assert(Var && "Invalid complex DbgVariable!");
+    assert(Expr.size() == 1 &&
+           "variableHasComplexAddress() invoked on multi-FI variable");
+    return Expr.back()->getNumElements() > 0;
   }
   bool isBlockByrefVariable() const;
-  unsigned getNumAddrElements() const {
-    assert(Var.isVariable() && "Invalid complex DbgVariable!");
-    return Expr.getNumElements();
-  }
-  uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); }
-  DIType getType() const;
+  const DIType *getType() const;
 
 private:
   /// resolve - Look in the DwarfDebug map for the MDNode that
   /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const;
+  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const;
 };
 
 
@@ -178,10 +209,6 @@ class DwarfDebug : public AsmPrinterHandler {
   // Size of each symbol emitted (for those symbols that have a specific size).
   DenseMap<const MCSymbol *, uint64_t> SymSize;
 
-  // Provides a unique id per text section.
-  typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType;
-  SectionMapType SectionMap;
-
   LexicalScopes LScopes;
 
   // Collection of abstract variables.
@@ -190,7 +217,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
   // can refer to them in spite of insertions into this list.
-  SmallVector<DebugLocList, 4> DotDebugLocEntries;
+  DebugLocStream DebugLocs;
 
   // This is a collection of subprogram MDNodes that are processed to
   // create DIEs.
@@ -224,25 +251,10 @@ class DwarfDebug : public AsmPrinterHandler {
   // If nonnull, stores the CU in which the previous subprogram was contained.
   const DwarfCompileUnit *PrevCU;
 
-  // Section Symbols: these are assembler temporary labels that are emitted at
-  // the beginning of each supported dwarf section.  These are used to form
-  // section offsets and are created by EmitSectionLabels.
-  MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
-  MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
-  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
-  MCSymbol *FunctionBeginSym, *FunctionEndSym;
-  MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym;
-  MCSymbol *DwarfTypesDWOSectionSym;
-  MCSymbol *DwarfStrDWOSectionSym;
-  MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym;
-
   // As an optimization, there is no need to emit an entry in the directory
   // table for the same directory as DW_AT_comp_dir.
   StringRef CompilationDir;
 
-  // Counter for assigning globally unique IDs for ranges.
-  unsigned GlobalRangeCount;
-
   // Holder for the file specific debug information.
   DwarfFile InfoHolder;
 
@@ -258,7 +270,9 @@ class DwarfDebug : public AsmPrinterHandler {
   // them.
   DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
 
-  SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1> TypeUnitsUnderConstruction;
+  SmallVector<
+      std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
+      TypeUnitsUnderConstruction;
 
   // Whether to emit the pubnames/pubtypes sections.
   bool HasDwarfPubSections;
@@ -270,6 +284,9 @@ class DwarfDebug : public AsmPrinterHandler {
   // text.
   bool UsedNonDefaultText;
 
+  // Whether to use the GNU TLS opcode (instead of the standard opcode).
+  bool UseGNUTLSOpcode;
+
   // Version of dwarf we're emitting.
   unsigned DwarfVersion;
 
@@ -298,6 +315,7 @@ class DwarfDebug : public AsmPrinterHandler {
   // True iff there are multiple CUs in this module.
   bool SingleCU;
   bool IsDarwin;
+  bool IsPS4;
 
   AddressPool AddrPool;
 
@@ -306,7 +324,7 @@ class DwarfDebug : public AsmPrinterHandler {
   DwarfAccelTable AccelNamespace;
   DwarfAccelTable AccelTypes;
 
-  DenseMap<const Function *, DISubprogram> FunctionDIs;
+  DenseMap<const Function *, DISubprogram *> FunctionDIs;
 
   MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
 
@@ -314,22 +332,21 @@ class DwarfDebug : public AsmPrinterHandler {
     return InfoHolder.getUnits();
   }
 
+  typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+
   /// \brief Find abstract variable associated with Var.
-  DbgVariable *getExistingAbstractVariable(const DIVariable &DV,
-                                           DIVariable &Cleansed);
-  DbgVariable *getExistingAbstractVariable(const DIVariable &DV);
-  void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope);
-  void ensureAbstractVariableIsCreated(const DIVariable &Var,
+  DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
+                                           const DILocalVariable *&Cleansed);
+  DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
+  void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope);
+  void ensureAbstractVariableIsCreated(InlinedVariable Var,
                                        const MDNode *Scope);
-  void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
+  void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var,
                                                const MDNode *Scope);
 
   /// \brief Construct a DIE for this abstract scope.
   void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
 
-  /// \brief Emit initial Dwarf sections with a label at the start of each one.
-  void emitSectionLabels();
-
   /// \brief Compute the size and offset of a DIE given an incoming Offset.
   unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
 
@@ -347,23 +364,15 @@ class DwarfDebug : public AsmPrinterHandler {
   /// processed.
   void finalizeModuleInfo();
 
-  /// \brief Emit labels to close any remaining sections that have been left
-  /// open.
-  void endSections();
-
   /// \brief Emit the debug info section.
   void emitDebugInfo();
 
   /// \brief Emit the abbreviation section.
   void emitAbbreviations();
 
-  /// \brief Emit the last address of the section and the end of
-  /// the line matrix.
-  void emitEndOfLineMatrix(unsigned SectionEnd);
-
   /// \brief Emit a specified accelerator table.
-  void emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
-                 StringRef TableName, StringRef SymName);
+  void emitAccel(DwarfAccelTable &Accel, MCSection *Section,
+                 StringRef TableName);
 
   /// \brief Emit visible names into a hashed accelerator table section.
   void emitAccelNames();
@@ -391,7 +400,7 @@ class DwarfDebug : public AsmPrinterHandler {
   void emitDebugPubTypes(bool GnuStyle = false);
 
   void emitDebugPubSection(
-      bool GnuStyle, const MCSection *PSec, StringRef Name,
+      bool GnuStyle, MCSection *PSec, StringRef Name,
       const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
 
   /// \brief Emit visible names into a debug str section.
@@ -444,11 +453,11 @@ class DwarfDebug : public AsmPrinterHandler {
 
   /// \brief Create new DwarfCompileUnit for the given metadata node with tag
   /// DW_TAG_compile_unit.
-  DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit);
+  DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit);
 
   /// \brief Construct imported_module or imported_declaration DIE.
   void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
-                                        const MDNode *N);
+                                        const DIImportedEntity *N);
 
   /// \brief Register a source line with debug info. Returns the unique
   /// label that was emitted and which provides correspondence to the
@@ -461,8 +470,8 @@ class DwarfDebug : public AsmPrinterHandler {
   void identifyScopeMarkers();
 
   /// \brief Populate LexicalScope entries with variables' info.
-  void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
-                           SmallPtrSetImpl<const MDNode *> &ProcessedVars);
+  void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
+                           DenseSet<InlinedVariable> &ProcessedVars);
 
   /// \brief Build the location list for all DBG_VALUEs in the
   /// function that describe the same variable.
@@ -471,7 +480,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   /// \brief Collect variable information from the side table maintained
   /// by MMI.
-  void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P);
+  void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
 
   /// \brief Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
@@ -513,7 +522,7 @@ public:
   /// \brief Add a DIE to the set of types that we're going to pull into
   /// type units.
   void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
-                            DIE &Die, DICompositeType CTy);
+                            DIE &Die, const DICompositeType *CTy);
 
   /// \brief Add a label so that arange data can be generated for it.
   void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
@@ -524,8 +533,9 @@ public:
     SymSize[Sym] = Size;
   }
 
-  /// \brief Recursively Emits a debug information entry.
-  void emitDIE(DIE &Die);
+  /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the
+  /// standard DW_OP_form_tls_address opcode
+  bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
 
   // Experimental DWARF5 features.
 
@@ -540,42 +550,23 @@ public:
   /// Returns the Dwarf Version.
   unsigned getDwarfVersion() const { return DwarfVersion; }
 
-  /// Returns the section symbol for the .debug_loc section.
-  MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; }
-
-  /// Returns the section symbol for the .debug_str section.
-  MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; }
-
-  /// Returns the section symbol for the .debug_ranges section.
-  MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; }
-
   /// Returns the previous CU that was being updated
   const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
   void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
 
   /// Returns the entries for the .debug_loc section.
-  const SmallVectorImpl<DebugLocList> &
-  getDebugLocEntries() const {
-    return DotDebugLocEntries;
-  }
+  const DebugLocStream &getDebugLocs() const { return DebugLocs; }
 
   /// \brief Emit an entry for the debug loc section. This can be used to
   /// handle an entry that's going to be emitted into the debug loc section.
-  void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry);
-  /// \brief emit a single value for the debug loc section.
-  void emitDebugLocValue(ByteStreamer &Streamer,
-                         const DebugLocEntry::Value &Value,
-                         unsigned PieceOffsetInBits = 0);
-  /// Emits an optimal (=sorted) sequence of DW_OP_pieces.
-  void emitLocPieces(ByteStreamer &Streamer,
-                     const DITypeIdentifierMap &Map,
-                     ArrayRef<DebugLocEntry::Value> Values);
+  void emitDebugLocEntry(ByteStreamer &Streamer,
+                         const DebugLocStream::Entry &Entry);
 
   /// Emit the location for a debug loc entry, including the size header.
-  void emitDebugLocEntryLocation(const DebugLocEntry &Entry);
+  void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry);
 
   /// Find the MDNode for the given reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
+  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
     return Ref.resolve(TypeIdentifierMap);
   }
 
@@ -592,7 +583,7 @@ public:
   /// or another context nested inside a subprogram.
   bool isSubprogramContext(const MDNode *Context);
 
-  void addSubprogramNames(DISubprogram SP, DIE &Die);
+  void addSubprogramNames(const DISubprogram *SP, DIE &Die);
 
   AddressPool &getAddressPool() { return AddrPool; }
 
@@ -605,8 +596,6 @@ public:
   void addAccelType(StringRef Name, const DIE &Die, char Flags);
 
   const MachineFunction *getCurrentFunction() const { return CurFn; }
-  const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; }
-  const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; }
 
   iterator_range<ImportedEntityMap::const_iterator>
   findImportedEntitiesForScope(const MDNode *Scope) const {
@@ -626,12 +615,6 @@ public:
   /// \brief Return Label immediately following the instruction.
   MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
-  // FIXME: Consider rolling ranges up into DwarfDebug since we use a single
-  // range_base anyway, so there's no need to keep them as separate per-CU range
-  // lists. (though one day we might end up with a range.dwo section, in which
-  // case it'd go to DwarfFile)
-  unsigned getNextRangeNumber() { return GlobalRangeCount++; }
-
   // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
 
   SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index e8867c0a..a4fd36f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -21,17 +21,24 @@ namespace llvm {
 class MachineFunction;
 class ARMTargetStreamer;
 
-class DwarfCFIException : public EHStreamer {
-  /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
-  /// should be emitted.
+class DwarfCFIExceptionBase : public EHStreamer {
+protected:
+  DwarfCFIExceptionBase(AsmPrinter *A);
+
+  /// Per-function flag to indicate if frame CFI info should be emitted.
+  bool shouldEmitCFI;
+
+  void markFunctionEnd() override;
+};
+
+class DwarfCFIException : public DwarfCFIExceptionBase {
+  /// Per-function flag to indicate if .cfi_personality should be emitted.
   bool shouldEmitPersonality;
 
-  /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
-  /// should be emitted.
+  /// Per-function flag to indicate if .cfi_lsda should be emitted.
   bool shouldEmitLSDA;
 
-  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
-  /// should be emitted.
+  /// Per-function flag to indicate if frame moves info should be emitted.
   bool shouldEmitMoves;
 
   AsmPrinter::CFIMoveType moveTypeModule;
@@ -41,44 +48,38 @@ public:
   // Main entry points.
   //
   DwarfCFIException(AsmPrinter *A);
-  virtual ~DwarfCFIException();
+  ~DwarfCFIException() override;
 
-  /// endModule - Emit all exception information that should come after the
-  /// content.
+  /// Emit all exception information that should come after the content.
   void endModule() override;
 
-  /// beginFunction - Gather pre-function exception information.  Assumes being
-  /// emitted immediately after the function entry point.
+  /// Gather pre-function exception information.  Assumes being emitted
+  /// immediately after the function entry point.
   void beginFunction(const MachineFunction *MF) override;
 
-  /// endFunction - Gather and emit post-function exception information.
+  /// Gather and emit post-function exception information.
   void endFunction(const MachineFunction *) override;
 };
 
-class ARMException : public EHStreamer {
+class ARMException : public DwarfCFIExceptionBase {
   void emitTypeInfos(unsigned TTypeEncoding) override;
   ARMTargetStreamer &getTargetStreamer();
 
-  /// shouldEmitCFI - Per-function flag to indicate if frame CFI info
-  /// should be emitted.
-  bool shouldEmitCFI;
-
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
   ARMException(AsmPrinter *A);
-  virtual ~ARMException();
+  ~ARMException() override;
 
-  /// endModule - Emit all exception information that should come after the
-  /// content.
+  /// Emit all exception information that should come after the content.
   void endModule() override;
 
-  /// beginFunction - Gather pre-function exception information.  Assumes being
-  /// emitted immediately after the function entry point.
+  /// Gather pre-function exception information.  Assumes being emitted
+  /// immediately after the function entry point.
   void beginFunction(const MachineFunction *MF) override;
 
-  /// endFunction - Gather and emit post-function exception information.
+  /// Gather and emit post-function exception information.
   void endFunction(const MachineFunction *) override;
 };
 } // End of namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 8e85eff..a2799b8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -22,14 +22,6 @@
 
 using namespace llvm;
 
-const TargetRegisterInfo *DwarfExpression::getTRI() const {
-  return AP.TM.getSubtargetImpl()->getRegisterInfo();
-}
-
-unsigned DwarfExpression::getDwarfVersion() const {
-  return AP.getDwarfDebug()->getDwarfVersion();
-}
-
 void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
   assert(DwarfReg >= 0 && "invalid negative dwarf register number");
   if (DwarfReg < 32) {
@@ -74,25 +66,28 @@ void DwarfExpression::AddShr(unsigned ShiftBy) {
 }
 
 bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
-  int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false);
-  if (DwarfReg < 0)
-    return false;
-
   if (isFrameRegister(MachineReg)) {
     // If variable offset is based in frame register then use fbreg.
     EmitOp(dwarf::DW_OP_fbreg);
     EmitSigned(Offset);
-  } else {
-    AddRegIndirect(DwarfReg, Offset);
+    return true;
   }
+
+  int DwarfReg = TRI.getDwarfRegNum(MachineReg, false);
+  if (DwarfReg < 0)
+    return false;
+
+  AddRegIndirect(DwarfReg, Offset);
   return true;
 }
 
 bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
                                          unsigned PieceSizeInBits,
                                          unsigned PieceOffsetInBits) {
-  const TargetRegisterInfo *TRI = getTRI();
-  int Reg = TRI->getDwarfRegNum(MachineReg, false);
+  if (!TRI.isPhysicalRegister(MachineReg))
+    return false;
+
+  int Reg = TRI.getDwarfRegNum(MachineReg, false);
 
   // If this is a valid register number, emit it.
   if (Reg >= 0) {
@@ -104,12 +99,12 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
 
   // Walk up the super-register chain until we find a valid number.
   // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
-  for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
-    Reg = TRI->getDwarfRegNum(*SR, false);
+  for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+    Reg = TRI.getDwarfRegNum(*SR, false);
     if (Reg >= 0) {
-      unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg);
-      unsigned Size = TRI->getSubRegIdxSize(Idx);
-      unsigned RegOffset = TRI->getSubRegIdxOffset(Idx);
+      unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
+      unsigned Size = TRI.getSubRegIdxSize(Idx);
+      unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
       AddReg(Reg, "super-register");
       if (PieceOffsetInBits == RegOffset) {
         AddOpPiece(Size, RegOffset);
@@ -133,15 +128,15 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
   // efficient DW_OP_piece.
   unsigned CurPos = PieceOffsetInBits;
   // The size of the register in bits, assuming 8 bits per byte.
-  unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8;
+  unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
   // Keep track of the bits in the register we already emitted, so we
   // can avoid emitting redundant aliasing subregs.
   SmallBitVector Coverage(RegSize, false);
-  for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
-    unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR);
-    unsigned Size = TRI->getSubRegIdxSize(Idx);
-    unsigned Offset = TRI->getSubRegIdxOffset(Idx);
-    Reg = TRI->getDwarfRegNum(*SR, false);
+  for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+    unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR);
+    unsigned Size = TRI.getSubRegIdxSize(Idx);
+    unsigned Offset = TRI.getSubRegIdxOffset(Idx);
+    Reg = TRI.getDwarfRegNum(*SR, false);
 
     // Intersection between the bits we already emitted and the bits
     // covered by this subregister.
@@ -177,7 +172,7 @@ void DwarfExpression::AddSignedConstant(int Value) {
   // value, so the producers and consumers started to rely on heuristics
   // to disambiguate the value vs. location status of the expression.
   // See PR21176 for more details.
-  if (getDwarfVersion() >= 4)
+  if (DwarfVersion >= 4)
     EmitOp(dwarf::DW_OP_stack_value);
 }
 
@@ -185,7 +180,7 @@ void DwarfExpression::AddUnsignedConstant(unsigned Value) {
   EmitOp(dwarf::DW_OP_constu);
   EmitUnsigned(Value);
   // cf. comment in DwarfExpression::AddSignedConstant().
-  if (getDwarfVersion() >= 4)
+  if (DwarfVersion >= 4)
     EmitOp(dwarf::DW_OP_stack_value);
 }
 
@@ -197,64 +192,74 @@ static unsigned getOffsetOrZero(unsigned OffsetInBits,
   return OffsetInBits;
 }
 
-bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
+bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
                                               unsigned MachineReg,
                                               unsigned PieceOffsetInBits) {
-  unsigned N = Expr.getNumElements();
-  unsigned I = 0;
-  bool ValidReg = false;
+  auto I = Expr->expr_op_begin();
+  auto E = Expr->expr_op_end();
+  if (I == E)
+    return AddMachineRegPiece(MachineReg);
+
   // Pattern-match combinations for which more efficient representations exist
   // first.
-  if (N >= 3 && Expr.getElement(0) == dwarf::DW_OP_piece) {
-    unsigned SizeOfByte = 8;
-    unsigned OffsetInBits = Expr.getElement(1) * SizeOfByte;
-    unsigned SizeInBits = Expr.getElement(2) * SizeOfByte;
-    ValidReg =
-        AddMachineRegPiece(MachineReg, SizeInBits,
-                           getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
-    I = 3;
-  } else if (N >= 3 && Expr.getElement(0) == dwarf::DW_OP_plus &&
-             Expr.getElement(2) == dwarf::DW_OP_deref) {
+  bool ValidReg = false;
+  switch (I->getOp()) {
+  case dwarf::DW_OP_bit_piece: {
+    unsigned OffsetInBits = I->getArg(0);
+    unsigned SizeInBits   = I->getArg(1);
+    // Piece always comes at the end of the expression.
+    return AddMachineRegPiece(MachineReg, SizeInBits,
+               getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+  }
+  case dwarf::DW_OP_plus: {
     // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
-    unsigned Offset = Expr.getElement(1);
-    ValidReg = AddMachineRegIndirect(MachineReg, Offset);
-    I = 3;
-  } else if (N >= 1 && Expr.getElement(0) == dwarf::DW_OP_deref) {
-    // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
-    ValidReg = AddMachineRegIndirect(MachineReg);
-    I = 1;
-  } else
-    ValidReg = AddMachineRegPiece(MachineReg);
+    auto N = I.getNext();
+    if (N != E && N->getOp() == dwarf::DW_OP_deref) {
+      unsigned Offset = I->getArg(0);
+      ValidReg = AddMachineRegIndirect(MachineReg, Offset);
+      std::advance(I, 2);
+      break;
+    } else
+      ValidReg = AddMachineRegPiece(MachineReg);
+  }
+  case dwarf::DW_OP_deref: {
+      // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+      ValidReg = AddMachineRegIndirect(MachineReg);
+      ++I;
+      break;
+  }
+  default:
+    llvm_unreachable("unsupported operand");
+  }
 
   if (!ValidReg)
     return false;
 
   // Emit remaining elements of the expression.
-  AddExpression(Expr, I);
+  AddExpression(I, E, PieceOffsetInBits);
   return true;
 }
 
-void DwarfExpression::AddExpression(DIExpression Expr, unsigned I,
+void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
+                                    DIExpression::expr_op_iterator E,
                                     unsigned PieceOffsetInBits) {
-  unsigned N = Expr.getNumElements();
-  for (; I < N; ++I) {
-    switch (Expr.getElement(I)) {
-    case dwarf::DW_OP_piece: {
-      unsigned SizeOfByte = 8;
-      unsigned OffsetInBits = Expr.getElement(++I) * SizeOfByte;
-      unsigned SizeInBits = Expr.getElement(++I) * SizeOfByte;
+  for (; I != E; ++I) {
+    switch (I->getOp()) {
+    case dwarf::DW_OP_bit_piece: {
+      unsigned OffsetInBits = I->getArg(0);
+      unsigned SizeInBits   = I->getArg(1);
       AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
       break;
     }
     case dwarf::DW_OP_plus:
       EmitOp(dwarf::DW_OP_plus_uconst);
-      EmitUnsigned(Expr.getElement(++I));
+      EmitUnsigned(I->getArg(0));
       break;
     case dwarf::DW_OP_deref:
       EmitOp(dwarf::DW_OP_deref);
       break;
     default:
-      llvm_unreachable("unhandled opcode found in DIExpression");
+      llvm_unreachable("unhandled opcode found in expression");
     }
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 92e4d5d..78ec937 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -30,21 +30,22 @@ class DIELoc;
 /// entry.
 class DwarfExpression {
 protected:
-  const AsmPrinter &AP;
   // Various convenience accessors that extract things out of AsmPrinter.
-  const TargetRegisterInfo *getTRI() const;
-  unsigned getDwarfVersion() const;
+  const TargetRegisterInfo &TRI;
+  unsigned DwarfVersion;
 
 public:
-  DwarfExpression(const AsmPrinter &AP) : AP(AP) {}
+  DwarfExpression(const TargetRegisterInfo &TRI,
+                  unsigned DwarfVersion)
+    : TRI(TRI), DwarfVersion(DwarfVersion) {}
   virtual ~DwarfExpression() {}
 
   /// Output a dwarf operand and an optional assembler comment.
   virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
   /// Emit a raw signed value.
-  virtual void EmitSigned(int Value) = 0;
+  virtual void EmitSigned(int64_t Value) = 0;
   /// Emit a raw unsigned value.
-  virtual void EmitUnsigned(unsigned Value) = 0;
+  virtual void EmitUnsigned(uint64_t Value) = 0;
   /// Return whether the given machine register is the frame register in the
   /// current function.
   virtual bool isFrameRegister(unsigned MachineReg) = 0;
@@ -87,17 +88,19 @@ public:
   /// Emit an unsigned constant.
   void AddUnsignedConstant(unsigned Value);
 
-  /// Emit an entire DIExpression on top of a machine register location.
+  /// \brief Emit an entire expression on top of a machine register location.
+  ///
   /// \param PieceOffsetInBits If this is one piece out of a fragmented
   /// location, this is the offset of the piece inside the entire variable.
   /// \return false if no DWARF register exists for MachineReg.
-  bool AddMachineRegExpression(DIExpression Expr, unsigned MachineReg,
+  bool AddMachineRegExpression(const DIExpression *Expr, unsigned MachineReg,
                                unsigned PieceOffsetInBits = 0);
-  /// Emit a the operations in a DIExpression, starting from element I.
+  /// Emit a the operations remaining the DIExpressionIterator I.
   /// \param PieceOffsetInBits If this is one piece out of a fragmented
   /// location, this is the offset of the piece inside the entire variable.
-  void AddExpression(DIExpression Expr, unsigned PieceOffsetInBits = 0,
-                     unsigned I = 0);
+  void AddExpression(DIExpression::expr_op_iterator I,
+                     DIExpression::expr_op_iterator E,
+                     unsigned PieceOffsetInBits = 0);
 };
 
 /// DwarfExpression implementation for .debug_loc entries.
@@ -105,27 +108,27 @@ class DebugLocDwarfExpression : public DwarfExpression {
   ByteStreamer &BS;
 
 public:
-  DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS)
-      : DwarfExpression(AP), BS(BS) {}
+  DebugLocDwarfExpression(const TargetRegisterInfo &TRI,
+                          unsigned DwarfVersion, ByteStreamer &BS)
+    : DwarfExpression(TRI, DwarfVersion), BS(BS) {}
 
   void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
-  void EmitSigned(int Value) override;
-  void EmitUnsigned(unsigned Value) override;
+  void EmitSigned(int64_t Value) override;
+  void EmitUnsigned(uint64_t Value) override;
   bool isFrameRegister(unsigned MachineReg) override;
 };
 
 /// DwarfExpression implementation for singular DW_AT_location.
 class DIEDwarfExpression : public DwarfExpression {
+const AsmPrinter &AP;
   DwarfUnit &DU;
   DIELoc &DIE;
 
 public:
-  DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE)
-      : DwarfExpression(AP), DU(DU), DIE(DIE) {}
-
+  DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
   void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
-  void EmitSigned(int Value) override;
-  void EmitUnsigned(unsigned Value) override;
+  void EmitSigned(int64_t Value) override;
+  void EmitUnsigned(uint64_t Value) override;
   bool isFrameRegister(unsigned MachineReg) override;
 };
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 549abf8..10b58d4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -17,9 +17,8 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
-DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
-                     BumpPtrAllocator &DA)
-    : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {}
+DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
+    : Asm(AP), StrPool(DA, *Asm, Pref) {}
 
 DwarfFile::~DwarfFile() {}
 
@@ -48,15 +47,15 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
 
 // Emit the various dwarf units to the unit section USection with
 // the abbreviations going into ASection.
-void DwarfFile::emitUnits(const MCSymbol *ASectionSym) {
+void DwarfFile::emitUnits(bool UseOffsets) {
   for (const auto &TheU : CUs) {
     DIE &Die = TheU->getUnitDie();
-    const MCSection *USection = TheU->getSection();
-    Asm->OutStreamer.SwitchSection(USection);
+    MCSection *USection = TheU->getSection();
+    Asm->OutStreamer->SwitchSection(USection);
 
-    TheU->emitHeader(ASectionSym);
+    TheU->emitHeader(UseOffsets);
 
-    DD.emitDIE(Die);
+    Asm->emitDwarfDIE(Die);
   }
 }
 
@@ -120,37 +119,26 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
   Die.setSize(Offset - Die.getOffset());
   return Offset;
 }
-void DwarfFile::emitAbbrevs(const MCSection *Section) {
+
+void DwarfFile::emitAbbrevs(MCSection *Section) {
   // Check to see if it is worth the effort.
   if (!Abbreviations.empty()) {
     // Start the debug abbrev section.
-    Asm->OutStreamer.SwitchSection(Section);
-
-    // For each abbrevation.
-    for (const DIEAbbrev *Abbrev : Abbreviations) {
-      // Emit the abbrevations code (base 1 index.)
-      Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
-
-      // Emit the abbreviations data.
-      Abbrev->Emit(Asm);
-    }
-
-    // Mark end of abbreviations.
-    Asm->EmitULEB128(0, "EOM(3)");
+    Asm->OutStreamer->SwitchSection(Section);
+    Asm->emitDwarfAbbrevs(Abbreviations);
   }
 }
 
 // Emit strings into a string section.
-void DwarfFile::emitStrings(const MCSection *StrSection,
-                            const MCSection *OffsetSection) {
+void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) {
   StrPool.emit(*Asm, StrSection, OffsetSection);
 }
 
-void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
   SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
-  DIVariable DV = Var->getVariable();
+  const DILocalVariable *DV = Var->getVariable();
   // Variables with positive arg numbers are parameters.
-  if (unsigned ArgNum = DV.getArgNumber()) {
+  if (unsigned ArgNum = DV->getArg()) {
     // Keep all parameters in order at the start of the variable list to ensure
     // function types are correct (no out-of-order parameters)
     //
@@ -160,7 +148,7 @@ void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
     // rather than linear search.
     auto I = Vars.begin();
     while (I != Vars.end()) {
-      unsigned CurNum = (*I)->getVariable().getArgNumber();
+      unsigned CurNum = (*I)->getVariable()->getArg();
       // A local (non-parameter) variable has been found, insert immediately
       // before it.
       if (CurNum == 0)
@@ -168,18 +156,17 @@ void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
       // A later indexed parameter has been found, insert immediately before it.
       if (CurNum > ArgNum)
         break;
-      // FIXME: There are still some cases where two inlined functions are
-      // conflated together (two calls to the same function at the same
-      // location (eg: via a macro, or without column info, etc)) and then
-      // their arguments are conflated as well.
-      assert((LS->getParent() || CurNum != ArgNum) &&
-             "Duplicate argument for top level (non-inlined) function");
+      if (CurNum == ArgNum) {
+        (*I)->addMMIEntry(*Var);
+        return false;
+      }
       ++I;
     }
     Vars.insert(I, Var);
-    return;
+    return true;
   }
 
   Vars.push_back(Var);
+  return true;
 }
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index f14d673..532ed96 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -28,17 +28,15 @@ class DwarfUnit;
 class DIEAbbrev;
 class MCSymbol;
 class DIE;
-class DISubprogram;
 class LexicalScope;
 class StringRef;
 class DwarfDebug;
 class MCSection;
+class MDNode;
 class DwarfFile {
   // Target of Dwarf emission, used for sizing of abbreviations.
   AsmPrinter *Asm;
 
-  DwarfDebug &DD;
-
   // Used to uniquely define abbreviations.
   FoldingSet<DIEAbbrev> AbbreviationsSet;
 
@@ -59,11 +57,10 @@ class DwarfFile {
   /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
   /// be shared across CUs, that is why we keep the map here instead
   /// of in DwarfCompileUnit.
-  DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
+  DenseMap<const MDNode *, DIE *> DITypeNodeToDieMap;
 
 public:
-  DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
-            BumpPtrAllocator &DA);
+  DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
 
   ~DwarfFile();
 
@@ -83,19 +80,19 @@ public:
 
   /// \brief Emit all of the units to the section listed with the given
   /// abbreviation section.
-  void emitUnits(const MCSymbol *ASectionSym);
+  void emitUnits(bool UseOffsets);
 
   /// \brief Emit a set of abbreviations to the specific section.
-  void emitAbbrevs(const MCSection *);
+  void emitAbbrevs(MCSection *);
 
   /// \brief Emit all of the strings to the section given.
-  void emitStrings(const MCSection *StrSection,
-                   const MCSection *OffsetSection = nullptr);
+  void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr);
 
   /// \brief Returns the string pool.
   DwarfStringPool &getStringPool() { return StrPool; }
 
-  void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+  /// \returns false if the variable was merged with a previous one.
+  bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
 
   DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() {
     return ScopeVariables;
@@ -106,10 +103,10 @@ public:
   }
 
   void insertDIE(const MDNode *TypeMD, DIE *Die) {
-    MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
+    DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
   }
   DIE *getDIE(const MDNode *TypeMD) {
-    return MDTypeNodeToDieMap.lookup(TypeMD);
+    return DITypeNodeToDieMap.lookup(TypeMD);
   }
 };
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index d76b66c..2066f74 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -8,63 +8,67 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfStringPool.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 
 using namespace llvm;
 
-static std::pair<MCSymbol *, unsigned> &
-getEntry(AsmPrinter &Asm,
-         StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> &Pool,
-         StringRef Prefix, StringRef Str) {
-  std::pair<MCSymbol *, unsigned> &Entry = Pool[Str];
-  if (!Entry.first) {
-    Entry.second = Pool.size() - 1;
-    Entry.first = Asm.GetTempSymbol(Prefix, Entry.second);
-  }
-  return Entry;
-}
+DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm,
+                                 StringRef Prefix)
+    : Pool(A), Prefix(Prefix),
+      ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {}
 
-MCSymbol *DwarfStringPool::getSymbol(AsmPrinter &Asm, StringRef Str) {
-  return getEntry(Asm, Pool, Prefix, Str).first;
-}
+DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
+                                                    StringRef Str) {
+  auto I = Pool.insert(std::make_pair(Str, EntryTy()));
+  if (I.second) {
+    auto &Entry = I.first->second;
+    Entry.Index = Pool.size() - 1;
+    Entry.Offset = NumBytes;
+    Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
 
-unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) {
-  return getEntry(Asm, Pool, Prefix, Str).second;
+    NumBytes += Str.size() + 1;
+    assert(NumBytes > Entry.Offset && "Unexpected overflow");
+  }
+  return EntryRef(*I.first);
 }
 
-void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection,
-                           const MCSection *OffsetSection) {
+void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
+                           MCSection *OffsetSection) {
   if (Pool.empty())
     return;
 
   // Start the dwarf str section.
-  Asm.OutStreamer.SwitchSection(StrSection);
+  Asm.OutStreamer->SwitchSection(StrSection);
 
   // Get all of the string pool entries and put them in an array by their ID so
   // we can sort them.
-  SmallVector<const StringMapEntry<std::pair<MCSymbol *, unsigned>> *, 64>
-  Entries(Pool.size());
+  SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size());
 
   for (const auto &E : Pool)
-    Entries[E.getValue().second] = &E;
+    Entries[E.getValue().Index] = &E;
 
   for (const auto &Entry : Entries) {
+    assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) &&
+           "Mismatch between setting and entry");
+
     // Emit a label for reference from debug information entries.
-    Asm.OutStreamer.EmitLabel(Entry->getValue().first);
+    if (ShouldCreateSymbols)
+      Asm.OutStreamer->EmitLabel(Entry->getValue().Symbol);
 
     // Emit the string itself with a terminating null byte.
-    Asm.OutStreamer.EmitBytes(
+    Asm.OutStreamer->AddComment("string offset=" +
+                                Twine(Entry->getValue().Offset));
+    Asm.OutStreamer->EmitBytes(
         StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1));
   }
 
   // If we've got an offset section go ahead and emit that now as well.
   if (OffsetSection) {
-    Asm.OutStreamer.SwitchSection(OffsetSection);
-    unsigned offset = 0;
+    Asm.OutStreamer->SwitchSection(OffsetSection);
     unsigned size = 4; // FIXME: DWARF64 is 8.
-    for (const auto &Entry : Entries) {
-      Asm.OutStreamer.EmitIntValue(offset, size);
-      offset += Entry->getKeyLength() + 1;
-    }
+    for (const auto &Entry : Entries)
+      Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 63e3412..93a1684 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -11,12 +11,13 @@
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
 
 #include "llvm/ADT/StringMap.h"
-#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/Support/Allocator.h"
 #include <utility>
 
 namespace llvm {
 
+class AsmPrinter;
 class MCSymbol;
 class MCSection;
 class StringRef;
@@ -25,25 +26,24 @@ class StringRef;
 // A String->Symbol mapping of strings used by indirect
 // references.
 class DwarfStringPool {
-  StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> Pool;
+  typedef DwarfStringPoolEntry EntryTy;
+  StringMap<EntryTy, BumpPtrAllocator &> Pool;
   StringRef Prefix;
+  unsigned NumBytes = 0;
+  bool ShouldCreateSymbols;
 
 public:
-  DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix)
-      : Pool(A), Prefix(Prefix) {}
+  typedef DwarfStringPoolEntryRef EntryRef;
 
-  void emit(AsmPrinter &Asm, const MCSection *StrSection,
-            const MCSection *OffsetSection = nullptr);
+  DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix);
 
-  /// \brief Returns an entry into the string pool with the given
-  /// string text.
-  MCSymbol *getSymbol(AsmPrinter &Asm, StringRef Str);
-
-  /// \brief Returns the index into the string pool with the given
-  /// string text.
-  unsigned getIndex(AsmPrinter &Asm, StringRef Str);
+  void emit(AsmPrinter &Asm, MCSection *StrSection,
+            MCSection *OffsetSection = nullptr);
 
   bool empty() const { return Pool.empty(); }
+
+  /// Get a reference to an entry in the string pool.
+  EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
 };
 }
 #endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 455258e..04836c6 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -17,6 +17,7 @@
 #include "DwarfDebug.h"
 #include "DwarfExpression.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
@@ -43,23 +44,28 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
                        cl::desc("Generate DWARF4 type units."),
                        cl::init(false));
 
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
+                                       DIELoc &DIE)
+    : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(),
+                      AP.getDwarfDebug()->getDwarfVersion()),
+      AP(AP), DU(DU), DIE(DIE) {}
+
 void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
   DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
 }
-void DIEDwarfExpression::EmitSigned(int Value) {
+void DIEDwarfExpression::EmitSigned(int64_t Value) {
   DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
 }
-void DIEDwarfExpression::EmitUnsigned(unsigned Value) {
+void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
   DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
 }
 bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) {
-  return MachineReg == getTRI()->getFrameRegister(*AP.MF);
+  return MachineReg == TRI.getFrameRegister(*AP.MF);
 }
 
-
-/// Unit - Unit constructor.
-DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node,
-                     AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
+DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag,
+                     const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,
+                     DwarfFile *DWU)
     : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A),
       DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
   assert(UnitTag == dwarf::DW_TAG_compile_unit ||
@@ -76,7 +82,6 @@ DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
     addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0);
 }
 
-/// ~Unit - Destructor for compile unit.
 DwarfUnit::~DwarfUnit() {
   for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
     DIEBlocks[j]->~DIEBlock();
@@ -84,15 +89,11 @@ DwarfUnit::~DwarfUnit() {
     DIELocs[j]->~DIELoc();
 }
 
-/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-/// information entry.
 DIEEntry *DwarfUnit::createDIEEntry(DIE &Entry) {
   DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
   return Value;
 }
 
-/// getDefaultLowerBound - Return the default lower bound for an array. If the
-/// DWARF version doesn't handle the language, return -1.
 int64_t DwarfUnit::getDefaultLowerBound() const {
   switch (getLanguage()) {
   default:
@@ -130,13 +131,37 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
     if (dwarf::DWARF_VERSION >= 4)
       return 1;
     break;
+
+  // The languages below have valid values only if the DWARF version >= 5.
+  case dwarf::DW_LANG_OpenCL:
+  case dwarf::DW_LANG_Go:
+  case dwarf::DW_LANG_Haskell:
+  case dwarf::DW_LANG_C_plus_plus_03:
+  case dwarf::DW_LANG_C_plus_plus_11:
+  case dwarf::DW_LANG_OCaml:
+  case dwarf::DW_LANG_Rust:
+  case dwarf::DW_LANG_C11:
+  case dwarf::DW_LANG_Swift:
+  case dwarf::DW_LANG_Dylan:
+  case dwarf::DW_LANG_C_plus_plus_14:
+    if (dwarf::DWARF_VERSION >= 5)
+      return 0;
+    break;
+
+  case dwarf::DW_LANG_Modula3:
+  case dwarf::DW_LANG_Julia:
+  case dwarf::DW_LANG_Fortran03:
+  case dwarf::DW_LANG_Fortran08:
+    if (dwarf::DWARF_VERSION >= 5)
+      return 1;
+    break;
   }
 
   return -1;
 }
 
 /// Check whether the DIE for this MDNode can be shared across CUs.
-static bool isShareableAcrossCUs(DIDescriptor D) {
+static bool isShareableAcrossCUs(const DINode *D) {
   // When the MDNode can be part of the type system, the DIE can be shared
   // across CUs.
   // Combining type units and cross-CU DIE sharing is lower value (since
@@ -144,25 +169,18 @@ static bool isShareableAcrossCUs(DIDescriptor D) {
   // level already) but may be implementable for some value in projects
   // building multiple independent libraries with LTO and then linking those
   // together.
-  return (D.isType() ||
-          (D.isSubprogram() && !DISubprogram(D).isDefinition())) &&
+  return (isa<DIType>(D) ||
+          (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
          !GenerateDwarfTypeUnits;
 }
 
-/// getDIE - Returns the debug information entry map slot for the
-/// specified debug variable. We delegate the request to DwarfDebug
-/// when the DIE for this MDNode can be shared across CUs. The mappings
-/// will be kept in DwarfDebug for shareable DIEs.
-DIE *DwarfUnit::getDIE(DIDescriptor D) const {
+DIE *DwarfUnit::getDIE(const DINode *D) const {
   if (isShareableAcrossCUs(D))
     return DU->getDIE(D);
   return MDNodeToDieMap.lookup(D);
 }
 
-/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
-/// when the DIE for this MDNode can be shared across CUs. The mappings
-/// will be kept in DwarfDebug for shareable DIEs.
-void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) {
+void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
   if (isShareableAcrossCUs(Desc)) {
     DU->insertDIE(Desc, D);
     return;
@@ -170,7 +188,6 @@ void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) {
   MDNodeToDieMap.insert(std::make_pair(Desc, D));
 }
 
-/// addFlag - Add a flag that is true.
 void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
   if (DD->getDwarfVersion() >= 4)
     Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne);
@@ -178,8 +195,6 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
     Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne);
 }
 
-/// addUInt - Add an unsigned integer attribute data and value.
-///
 void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, uint64_t Integer) {
   if (!Form)
@@ -193,8 +208,6 @@ void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) {
   addUInt(Block, (dwarf::Attribute)0, Form, Integer);
 }
 
-/// addSInt - Add an signed integer attribute data and value.
-///
 void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, int64_t Integer) {
   if (!Form)
@@ -208,43 +221,14 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
   addSInt(Die, (dwarf::Attribute)0, Form, Integer);
 }
 
-/// addString - Add a string attribute data and value. We always emit a
-/// reference to the string pool instead of immediate strings so that DIEs have
-/// more predictable sizes. In the case of split dwarf we emit an index
-/// into another table which gets us the static offset into the string
-/// table.
 void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
                           StringRef String) {
-  if (!isDwoUnit())
-    return addLocalString(Die, Attribute, String);
-
-  addIndexedString(Die, Attribute, String);
-}
-
-void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute,
-                                 StringRef String) {
-  unsigned idx = DU->getStringPool().getIndex(*Asm, String);
-  DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
-  DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
-  Die.addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str);
-}
-
-/// addLocalString - Add a string attribute data and value. This is guaranteed
-/// to be in the local string pool instead of indirected.
-void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute,
-                               StringRef String) {
-  MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String);
-  DIEValue *Value;
-  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-    Value = new (DIEValueAllocator) DIELabel(Symb);
-  else
-    Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym());
-  DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
-  Die.addValue(Attribute, dwarf::DW_FORM_strp, Str);
+  Die.addValue(Attribute,
+               isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp,
+               new (DIEValueAllocator)
+               DIEString(DU->getStringPool().getEntry(*Asm, String)));
 }
 
-/// addLabel - Add a Dwarf label attribute data and value.
-///
 void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
                          const MCSymbol *Label) {
   DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
@@ -255,8 +239,6 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
   addLabel(Die, (dwarf::Attribute)0, Form, Label);
 }
 
-/// addSectionOffset - Add an offset into a section attribute data and value.
-///
 void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
                                  uint64_t Integer) {
   if (DD->getDwarfVersion() >= 4)
@@ -270,9 +252,6 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirNam
                         : getCU().getOrCreateSourceID(FileName, DirName);
 }
 
-/// addOpAddress - Add a dwarf op address data and value using the
-/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
-///
 void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
   if (!DD->useSplitDwarf()) {
     addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
@@ -290,8 +269,6 @@ void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
   Die.addValue(Attribute, dwarf::DW_FORM_data4, Value);
 }
 
-/// addDIEEntry - Add a DIE attribute data and value.
-///
 void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
   addDIEEntry(Die, Attribute, createDIEEntry(Entry));
 }
@@ -321,9 +298,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
                Entry);
 }
 
-/// Create a DIE with the given Tag, add the DIE to its parent, and
-/// call insertDIE if MD is not null.
-DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
+DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
   assert(Tag != dwarf::DW_TAG_auto_variable &&
          Tag != dwarf::DW_TAG_arg_variable);
   Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag));
@@ -333,8 +308,6 @@ DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
   return Die;
 }
 
-/// addBlock - Add block data.
-///
 void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
   Loc->ComputeSize(Asm);
   DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
@@ -348,8 +321,6 @@ void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
   Die.addValue(Attribute, Block->BestForm(), Block);
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
 void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
                               StringRef Directory) {
   if (Line == 0)
@@ -361,58 +332,41 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
   addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) {
-  assert(V.isVariable());
+void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
+  assert(V);
 
-  addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(),
-                V.getContext().getDirectory());
+  addSourceLine(Die, V->getLine(), V->getScope()->getFilename(),
+                V->getScope()->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) {
-  assert(G.isGlobalVariable());
+void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {
+  assert(G);
 
-  addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory());
+  addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) {
-  assert(SP.isSubprogram());
+void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
+  assert(SP);
 
-  addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory());
+  addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) {
-  assert(Ty.isType());
+void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
+  assert(Ty);
 
-  addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) {
-  assert(Ty.isObjCProperty());
+void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
+  assert(Ty);
 
-  DIFile File = Ty.getFile();
-  addSourceLine(Die, Ty.getLineNumber(), File.getFilename(),
-                File.getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
-/// addSourceLine - Add location information to specified debug information
-/// entry.
-void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) {
-  assert(NS.Verify());
-
-  addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory());
+void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
+  addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
 }
 
-/// addRegisterOp - Add register operand.
 bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
                                    unsigned SizeInBits, unsigned OffsetInBits) {
   DIEDwarfExpression Expr(*Asm, *this, TheDie);
@@ -420,7 +374,6 @@ bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
   return true;
 }
 
-/// addRegisterOffset - Add register offset.
 bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
                                   int64_t Offset) {
   DIEDwarfExpression Expr(*Asm, *this, TheDie);
@@ -481,39 +434,31 @@ bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
 
    That is what this function does.  */
 
-/// addBlockByrefAddress - Start with the address based on the location
-/// provided, and generate the DWARF information necessary to find the
-/// actual Block variable (navigating the Block struct) based on the
-/// starting location.  Add the DWARF information to the die.  For
-/// more information, read large comment just above here.
-///
 void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
                                      dwarf::Attribute Attribute,
                                      const MachineLocation &Location) {
-  DIType Ty = DV.getType();
-  DIType TmpTy = Ty;
-  uint16_t Tag = Ty.getTag();
+  const DIType *Ty = DV.getType();
+  const DIType *TmpTy = Ty;
+  uint16_t Tag = Ty->getTag();
   bool isPointer = false;
 
   StringRef varName = DV.getName();
 
   if (Tag == dwarf::DW_TAG_pointer_type) {
-    DIDerivedType DTy(Ty);
-    TmpTy = resolve(DTy.getTypeDerivedFrom());
+    auto *DTy = cast<DIDerivedType>(Ty);
+    TmpTy = resolve(DTy->getBaseType());
     isPointer = true;
   }
 
-  DICompositeType blockStruct(TmpTy);
-
   // Find the __forwarding field and the variable field in the __Block_byref
   // struct.
-  DIArray Fields = blockStruct.getElements();
-  DIDerivedType varField;
-  DIDerivedType forwardingField;
+  DINodeArray Fields = cast<DICompositeTypeBase>(TmpTy)->getElements();
+  const DIDerivedType *varField = nullptr;
+  const DIDerivedType *forwardingField = nullptr;
 
-  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
-    DIDerivedType DT(Fields.getElement(i));
-    StringRef fieldName = DT.getName();
+  for (unsigned i = 0, N = Fields.size(); i < N; ++i) {
+    auto *DT = cast<DIDerivedType>(Fields[i]);
+    StringRef fieldName = DT->getName();
     if (fieldName == "__forwarding")
       forwardingField = DT;
     else if (fieldName == varName)
@@ -521,8 +466,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   }
 
   // Get the offsets for the forwarding field and the variable field.
-  unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3;
-  unsigned varFieldOffset = varField.getOffsetInBits() >> 2;
+  unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3;
+  unsigned varFieldOffset = varField->getOffsetInBits() >> 2;
 
   // Decode the original location, and use that as the start of the byref
   // variable's location.
@@ -567,10 +512,9 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
 }
 
 /// Return true if type encoding is unsigned.
-static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
-  DIDerivedType DTy(Ty);
-  if (DTy.isDerivedType()) {
-    dwarf::Tag T = (dwarf::Tag)Ty.getTag();
+static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
+  if (auto *DTy = dyn_cast<DIDerivedTypeBase>(Ty)) {
+    dwarf::Tag T = (dwarf::Tag)Ty->getTag();
     // Encode pointer constants as unsigned bytes. This is used at least for
     // null pointer constant emission.
     // (Pieces of) aggregate types that get hacked apart by SROA may also be
@@ -584,64 +528,64 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
         T == dwarf::DW_TAG_ptr_to_member_type ||
         T == dwarf::DW_TAG_reference_type ||
         T == dwarf::DW_TAG_rvalue_reference_type ||
-        T == dwarf::DW_TAG_structure_type)
+        T == dwarf::DW_TAG_structure_type ||
+        T == dwarf::DW_TAG_union_type)
       return true;
     assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
            T == dwarf::DW_TAG_volatile_type ||
            T == dwarf::DW_TAG_restrict_type ||
            T == dwarf::DW_TAG_enumeration_type);
-    if (DITypeRef Deriv = DTy.getTypeDerivedFrom())
+    if (DITypeRef Deriv = DTy->getBaseType())
       return isUnsignedDIType(DD, DD->resolve(Deriv));
     // FIXME: Enums without a fixed underlying type have unknown signedness
     // here, leading to incorrectly emitted constants.
-    assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type);
+    assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type);
     return false;
   }
 
-  DIBasicType BTy(Ty);
-  assert(BTy.isBasicType());
-  unsigned Encoding = BTy.getEncoding();
+  auto *BTy = cast<DIBasicType>(Ty);
+  unsigned Encoding = BTy->getEncoding();
   assert((Encoding == dwarf::DW_ATE_unsigned ||
           Encoding == dwarf::DW_ATE_unsigned_char ||
           Encoding == dwarf::DW_ATE_signed ||
           Encoding == dwarf::DW_ATE_signed_char ||
-          Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
-          (Ty.getTag() == dwarf::DW_TAG_unspecified_type &&
-           Ty.getName() == "decltype(nullptr)")) &&
+          Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
+          Encoding == dwarf::DW_ATE_boolean ||
+          (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
+           Ty->getName() == "decltype(nullptr)")) &&
          "Unsupported encoding");
-  return (Encoding == dwarf::DW_ATE_unsigned ||
-          Encoding == dwarf::DW_ATE_unsigned_char ||
-          Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
-          Ty.getTag() == dwarf::DW_TAG_unspecified_type);
+  return Encoding == dwarf::DW_ATE_unsigned ||
+         Encoding == dwarf::DW_ATE_unsigned_char ||
+         Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+         Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
 /// If this type is derived from a base type then return base type size.
-static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {
-  unsigned Tag = Ty.getTag();
+static uint64_t getBaseTypeSize(DwarfDebug *DD, const DIDerivedType *Ty) {
+  unsigned Tag = Ty->getTag();
 
   if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
       Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
       Tag != dwarf::DW_TAG_restrict_type)
-    return Ty.getSizeInBits();
+    return Ty->getSizeInBits();
 
-  DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());
+  auto *BaseType = DD->resolve(Ty->getBaseType());
 
-  assert(BaseType.isValid());
+  assert(BaseType && "Unexpected invalid base type");
 
   // If this is a derived type, go ahead and get the base type, unless it's a
   // reference then it's just the size of the field. Pointer types have no need
   // of this since they're a different type of qualification on the type.
-  if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
-      BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
-    return Ty.getSizeInBits();
+  if (BaseType->getTag() == dwarf::DW_TAG_reference_type ||
+      BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type)
+    return Ty->getSizeInBits();
 
-  if (BaseType.isDerivedType())
-    return getBaseTypeSize(DD, DIDerivedType(BaseType));
+  if (auto *DT = dyn_cast<DIDerivedType>(BaseType))
+    return getBaseTypeSize(DD, DT);
 
-  return BaseType.getSizeInBits();
+  return BaseType->getSizeInBits();
 }
 
-/// addConstantFPValue - Add constant value entry in variable DIE.
 void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
   assert(MO.isFPImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
@@ -664,20 +608,18 @@ void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
   addBlock(Die, dwarf::DW_AT_const_value, Block);
 }
 
-/// addConstantFPValue - Add constant value entry in variable DIE.
 void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
   // Pass this down to addConstantValue as an unsigned bag of bits.
   addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
 }
 
-/// addConstantValue - Add constant value entry in variable DIE.
-void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty) {
+void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI,
+                                 const DIType *Ty) {
   addConstantValue(Die, CI->getValue(), Ty);
 }
 
-/// addConstantValue - Add constant value entry in variable DIE.
 void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
-                                 DIType Ty) {
+                                 const DIType *Ty) {
   assert(MO.isImm() && "Invalid machine operand!");
 
   addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
@@ -690,11 +632,10 @@ void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
           Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val);
 }
 
-void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, DIType Ty) {
+void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) {
   addConstantValue(Die, Val, isUnsignedDIType(DD, Ty));
 }
 
-// addConstantValue - Add constant value entry in variable DIE.
 void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
   unsigned CIBitWidth = Val.getBitWidth();
   if (CIBitWidth <= 64) {
@@ -724,67 +665,67 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
   addBlock(Die, dwarf::DW_AT_const_value, Block);
 }
 
-/// addTemplateParams - Add template parameters into buffer.
-void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
+  if (!LinkageName.empty())
+    addString(Die,
+              DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+                                         : dwarf::DW_AT_MIPS_linkage_name,
+              GlobalValue::getRealLinkageName(LinkageName));
+}
+
+void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
   // Add template parameters.
-  for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
-    DIDescriptor Element = TParams.getElement(i);
-    if (Element.isTemplateTypeParameter())
-      constructTemplateTypeParameterDIE(Buffer,
-                                        DITemplateTypeParameter(Element));
-    else if (Element.isTemplateValueParameter())
-      constructTemplateValueParameterDIE(Buffer,
-                                         DITemplateValueParameter(Element));
+  for (const auto *Element : TParams) {
+    if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element))
+      constructTemplateTypeParameterDIE(Buffer, TTP);
+    else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element))
+      constructTemplateValueParameterDIE(Buffer, TVP);
   }
 }
 
-/// getOrCreateContextDIE - Get context owner's DIE.
-DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) {
-  if (!Context || Context.isFile())
+DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
+  if (!Context || isa<DIFile>(Context))
     return &getUnitDie();
-  if (Context.isType())
-    return getOrCreateTypeDIE(DIType(Context));
-  if (Context.isNameSpace())
-    return getOrCreateNameSpace(DINameSpace(Context));
-  if (Context.isSubprogram())
-    return getOrCreateSubprogramDIE(DISubprogram(Context));
+  if (auto *T = dyn_cast<DIType>(Context))
+    return getOrCreateTypeDIE(T);
+  if (auto *NS = dyn_cast<DINamespace>(Context))
+    return getOrCreateNameSpace(NS);
+  if (auto *SP = dyn_cast<DISubprogram>(Context))
+    return getOrCreateSubprogramDIE(SP);
   return getDIE(Context);
 }
 
-DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) {
-  DIScope Context = resolve(Ty.getContext());
+DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+  auto *Context = resolve(Ty->getScope());
   DIE *ContextDIE = getOrCreateContextDIE(Context);
 
   if (DIE *TyDIE = getDIE(Ty))
     return TyDIE;
 
   // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
 
-  constructTypeDIE(TyDIE, Ty);
+  constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
 
   updateAcceleratorTables(Context, Ty, TyDIE);
   return &TyDIE;
 }
 
-/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-/// given DIType.
 DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   if (!TyNode)
     return nullptr;
 
-  DIType Ty(TyNode);
-  assert(Ty.isType());
-  assert(Ty == resolve(Ty.getRef()) &&
+  auto *Ty = cast<DIType>(TyNode);
+  assert(Ty == resolve(Ty->getRef()) &&
          "type was not uniqued, possible ODR violation.");
 
   // DW_TAG_restrict_type is not supported in DWARF2
-  if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
-    return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom()));
+  if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
+    return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIScope Context = resolve(Ty.getContext());
+  auto *Context = resolve(Ty->getScope());
   DIE *ContextDIE = getOrCreateContextDIE(Context);
   assert(ContextDIE);
 
@@ -792,50 +733,49 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
     return TyDIE;
 
   // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
 
   updateAcceleratorTables(Context, Ty, TyDIE);
 
-  if (Ty.isBasicType())
-    constructTypeDIE(TyDIE, DIBasicType(Ty));
-  else if (Ty.isCompositeType()) {
-    DICompositeType CTy(Ty);
-    if (GenerateDwarfTypeUnits && !Ty.isForwardDecl())
-      if (MDString *TypeId = CTy.getIdentifier()) {
+  if (auto *BT = dyn_cast<DIBasicType>(Ty))
+    constructTypeDIE(TyDIE, BT);
+  else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
+    constructTypeDIE(TyDIE, STy);
+  else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+    if (GenerateDwarfTypeUnits && !Ty->isForwardDecl())
+      if (MDString *TypeId = CTy->getRawIdentifier()) {
         DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
         // Skip updating the accelerator tables since this is not the full type.
         return &TyDIE;
       }
     constructTypeDIE(TyDIE, CTy);
   } else {
-    assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    constructTypeDIE(TyDIE, DIDerivedType(Ty));
+    constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
   }
 
   return &TyDIE;
 }
 
-void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
-                                        const DIE &TyDIE) {
-  if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
+                                        const DIType *Ty, const DIE &TyDIE) {
+  if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
     bool IsImplementation = 0;
-    if (Ty.isCompositeType()) {
-      DICompositeType CT(Ty);
+    if (auto *CT = dyn_cast<DICompositeTypeBase>(Ty)) {
       // A runtime language of 0 actually means C/C++ and that any
       // non-negative value is some version of Objective-C/C++.
-      IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete();
+      IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
     }
     unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
-    DD->addAccelType(Ty.getName(), TyDIE, Flags);
+    DD->addAccelType(Ty->getName(), TyDIE, Flags);
 
-    if (!Context || Context.isCompileUnit() || Context.isFile() ||
-        Context.isNameSpace())
+    if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+        isa<DINamespace>(Context))
       addGlobalType(Ty, TyDIE, Context);
   }
 }
 
-/// addType - Add a new type attribute to the specified entity.
-void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) {
+void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
+                        dwarf::Attribute Attribute) {
   assert(Ty && "Trying to add a type that doesn't exist?");
 
   // Check for pre-existence.
@@ -855,12 +795,7 @@ void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) {
   addDIEEntry(Entity, Attribute, Entry);
 }
 
-/// getParentContextString - Walks the metadata parent chain in a language
-/// specific manner (using the compile unit language) and returns
-/// it as a string. This is done at the metadata level because DIEs may
-/// not currently have been added to the parent context and walking the
-/// DIEs looking for names is more expensive than walking the metadata.
-std::string DwarfUnit::getParentContextString(DIScope Context) const {
+std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
   if (!Context)
     return "";
 
@@ -869,11 +804,11 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
     return "";
 
   std::string CS;
-  SmallVector<DIScope, 1> Parents;
-  while (!Context.isCompileUnit()) {
+  SmallVector<const DIScope *, 1> Parents;
+  while (!isa<DICompileUnit>(Context)) {
     Parents.push_back(Context);
-    if (Context.getContext())
-      Context = resolve(Context.getContext());
+    if (Context->getScope())
+      Context = resolve(Context->getScope());
     else
       // Structure, etc types will have a NULL context if they're at the top
       // level.
@@ -882,12 +817,10 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
 
   // Reverse iterate over our list to go from the outermost construct to the
   // innermost.
-  for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(),
-                                                  E = Parents.rend();
-       I != E; ++I) {
-    DIScope Ctx = *I;
-    StringRef Name = Ctx.getName();
-    if (Name.empty() && Ctx.isNameSpace())
+  for (auto I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) {
+    const DIScope *Ctx = *I;
+    StringRef Name = Ctx->getName();
+    if (Name.empty() && isa<DINamespace>(Ctx))
       Name = "(anonymous namespace)";
     if (!Name.empty()) {
       CS += Name;
@@ -897,34 +830,32 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
   return CS;
 }
 
-/// constructTypeDIE - Construct basic type die from DIBasicType.
-void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
   // Get core information.
-  StringRef Name = BTy.getName();
+  StringRef Name = BTy->getName();
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
     addString(Buffer, dwarf::DW_AT_name, Name);
 
   // An unspecified type only has a name attribute.
-  if (BTy.getTag() == dwarf::DW_TAG_unspecified_type)
+  if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
     return;
 
   addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-          BTy.getEncoding());
+          BTy->getEncoding());
 
-  uint64_t Size = BTy.getSizeInBits() >> 3;
+  uint64_t Size = BTy->getSizeInBits() >> 3;
   addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 }
 
-/// constructTypeDIE - Construct derived type die from DIDerivedType.
-void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
   // Get core information.
-  StringRef Name = DTy.getName();
-  uint64_t Size = DTy.getSizeInBits() >> 3;
+  StringRef Name = DTy->getName();
+  uint64_t Size = DTy->getSizeInBits() >> 3;
   uint16_t Tag = Buffer.getTag();
 
   // Map to main type, void will not have a type.
-  DIType FromTy = resolve(DTy.getTypeDerivedFrom());
+  const DIType *FromTy = resolve(DTy->getBaseType());
   if (FromTy)
     addType(Buffer, FromTy);
 
@@ -938,35 +869,62 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
     addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 
   if (Tag == dwarf::DW_TAG_ptr_to_member_type)
-    addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
-                *getOrCreateTypeDIE(resolve(DTy.getClassType())));
+    addDIEEntry(
+        Buffer, dwarf::DW_AT_containing_type,
+        *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType())));
   // Add source line info if available and TyDesc is not a forward declaration.
-  if (!DTy.isForwardDecl())
+  if (!DTy->isForwardDecl())
     addSourceLine(Buffer, DTy);
 }
 
-/// constructSubprogramArguments - Construct function argument DIEs.
-void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) {
-  for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-    DIType Ty = resolve(Args.getElement(i));
+void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
+  for (unsigned i = 1, N = Args.size(); i < N; ++i) {
+    const DIType *Ty = resolve(Args[i]);
     if (!Ty) {
       assert(i == N-1 && "Unspecified parameter must be the last argument");
       createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
     } else {
       DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
       addType(Arg, Ty);
-      if (Ty.isArtificial())
+      if (Ty->isArtificial())
         addFlag(Arg, dwarf::DW_AT_artificial);
     }
   }
 }
 
-/// constructTypeDIE - Construct type DIE from DICompositeType.
-void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
+  // Add return type.  A void return won't have a type.
+  auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
+  if (Elements.size())
+    if (auto RTy = resolve(Elements[0]))
+      addType(Buffer, RTy);
+
+  bool isPrototyped = true;
+  if (Elements.size() == 2 && !Elements[1])
+    isPrototyped = false;
+
+  constructSubprogramArguments(Buffer, Elements);
+
+  // Add prototype flag if we're dealing with a C language and the function has
+  // been prototyped.
+  uint16_t Language = getLanguage();
+  if (isPrototyped &&
+      (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
+       Language == dwarf::DW_LANG_ObjC))
+    addFlag(Buffer, dwarf::DW_AT_prototyped);
+
+  if (CTy->isLValueReference())
+    addFlag(Buffer, dwarf::DW_AT_reference);
+
+  if (CTy->isRValueReference())
+    addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
   // Add name if not anonymous or intermediate type.
-  StringRef Name = CTy.getName();
+  StringRef Name = CTy->getName();
 
-  uint64_t Size = CTy.getSizeInBits() >> 3;
+  uint64_t Size = CTy->getSizeInBits() >> 3;
   uint16_t Tag = Buffer.getTag();
 
   switch (Tag) {
@@ -976,82 +934,39 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   case dwarf::DW_TAG_enumeration_type:
     constructEnumTypeDIE(Buffer, CTy);
     break;
-  case dwarf::DW_TAG_subroutine_type: {
-    // Add return type. A void return won't have a type.
-    DITypeArray Elements = DISubroutineType(CTy).getTypeArray();
-    DIType RTy(resolve(Elements.getElement(0)));
-    if (RTy)
-      addType(Buffer, RTy);
-
-    bool isPrototyped = true;
-    if (Elements.getNumElements() == 2 &&
-        !Elements.getElement(1))
-      isPrototyped = false;
-
-    constructSubprogramArguments(Buffer, Elements);
-
-    // Add prototype flag if we're dealing with a C language and the
-    // function has been prototyped.
-    uint16_t Language = getLanguage();
-    if (isPrototyped &&
-        (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
-         Language == dwarf::DW_LANG_ObjC))
-      addFlag(Buffer, dwarf::DW_AT_prototyped);
-
-    if (CTy.isLValueReference())
-      addFlag(Buffer, dwarf::DW_AT_reference);
-
-    if (CTy.isRValueReference())
-      addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
-  } break;
   case dwarf::DW_TAG_structure_type:
   case dwarf::DW_TAG_union_type:
   case dwarf::DW_TAG_class_type: {
     // Add elements to structure type.
-    DIArray Elements = CTy.getElements();
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIDescriptor Element = Elements.getElement(i);
-      if (Element.isSubprogram())
-        getOrCreateSubprogramDIE(DISubprogram(Element));
-      else if (Element.isDerivedType()) {
-        DIDerivedType DDTy(Element);
-        if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+    DINodeArray Elements = CTy->getElements();
+    for (const auto *Element : Elements) {
+      if (!Element)
+        continue;
+      if (auto *SP = dyn_cast<DISubprogram>(Element))
+        getOrCreateSubprogramDIE(SP);
+      else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
+        if (DDTy->getTag() == dwarf::DW_TAG_friend) {
           DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
-          addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()),
-                  dwarf::DW_AT_friend);
-        } else if (DDTy.isStaticMember()) {
+          addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
+        } else if (DDTy->isStaticMember()) {
           getOrCreateStaticMemberDIE(DDTy);
         } else {
           constructMemberDIE(Buffer, DDTy);
         }
-      } else if (Element.isObjCProperty()) {
-        DIObjCProperty Property(Element);
-        DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer);
-        StringRef PropertyName = Property.getObjCPropertyName();
+      } else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) {
+        DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
+        StringRef PropertyName = Property->getName();
         addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
-        if (Property.getType())
-          addType(ElemDie, Property.getType());
+        if (Property->getType())
+          addType(ElemDie, Property->getType());
         addSourceLine(ElemDie, Property);
-        StringRef GetterName = Property.getObjCPropertyGetterName();
+        StringRef GetterName = Property->getGetterName();
         if (!GetterName.empty())
           addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
-        StringRef SetterName = Property.getObjCPropertySetterName();
+        StringRef SetterName = Property->getSetterName();
         if (!SetterName.empty())
           addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
-        unsigned PropertyAttributes = 0;
-        if (Property.isReadOnlyObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
-        if (Property.isReadWriteObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
-        if (Property.isAssignObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
-        if (Property.isRetainObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
-        if (Property.isCopyObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
-        if (Property.isNonAtomicObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
-        if (PropertyAttributes)
+        if (unsigned PropertyAttributes = Property->getAttributes())
           addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
                   PropertyAttributes);
 
@@ -1060,28 +975,27 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
           Entry = createDIEEntry(ElemDie);
           insertDIEEntry(Element, Entry);
         }
-      } else
-        continue;
+      }
     }
 
-    if (CTy.isAppleBlockExtension())
+    if (CTy->isAppleBlockExtension())
       addFlag(Buffer, dwarf::DW_AT_APPLE_block);
 
     // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
     // inside C++ composite types to point to the base class with the vtable.
-    DICompositeType ContainingType(resolve(CTy.getContainingType()));
-    if (ContainingType)
+    if (auto *ContainingType =
+            dyn_cast_or_null<DICompositeType>(resolve(CTy->getVTableHolder())))
       addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
                   *getOrCreateTypeDIE(ContainingType));
 
-    if (CTy.isObjcClassComplete())
+    if (CTy->isObjcClassComplete())
       addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
 
     // Add template parameters to a class, structure or union types.
     // FIXME: The support isn't in the metadata for this yet.
     if (Tag == dwarf::DW_TAG_class_type ||
         Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
-      addTemplateParams(Buffer, CTy.getTemplateParams());
+      addTemplateParams(Buffer, CTy->getTemplateParams());
 
     break;
   }
@@ -1100,55 +1014,50 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // TODO: Do we care about size for enum forward declarations?
     if (Size)
       addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
-    else if (!CTy.isForwardDecl())
+    else if (!CTy->isForwardDecl())
       // Add zero size if it is not a forward declaration.
       addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);
 
     // If we're a forward decl, say so.
-    if (CTy.isForwardDecl())
+    if (CTy->isForwardDecl())
       addFlag(Buffer, dwarf::DW_AT_declaration);
 
     // Add source line info if available.
-    if (!CTy.isForwardDecl())
+    if (!CTy->isForwardDecl())
       addSourceLine(Buffer, CTy);
 
     // No harm in adding the runtime language to the declaration.
-    unsigned RLang = CTy.getRunTimeLang();
+    unsigned RLang = CTy->getRuntimeLang();
     if (RLang)
       addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
               RLang);
   }
 }
 
-/// constructTemplateTypeParameterDIE - Construct new DIE for the given
-/// DITemplateTypeParameter.
-void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
-                                                  DITemplateTypeParameter TP) {
+void DwarfUnit::constructTemplateTypeParameterDIE(
+    DIE &Buffer, const DITemplateTypeParameter *TP) {
   DIE &ParamDIE =
       createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
   // Add the type if it exists, it could be void and therefore no type.
-  if (TP.getType())
-    addType(ParamDIE, resolve(TP.getType()));
-  if (!TP.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
+  if (TP->getType())
+    addType(ParamDIE, resolve(TP->getType()));
+  if (!TP->getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
 }
 
-/// constructTemplateValueParameterDIE - Construct new DIE for the given
-/// DITemplateValueParameter.
-void
-DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
-                                              DITemplateValueParameter VP) {
-  DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer);
+void DwarfUnit::constructTemplateValueParameterDIE(
+    DIE &Buffer, const DITemplateValueParameter *VP) {
+  DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer);
 
   // Add the type if there is one, template template and template parameter
   // packs will not have a type.
-  if (VP.getTag() == dwarf::DW_TAG_template_value_parameter)
-    addType(ParamDIE, resolve(VP.getType()));
-  if (!VP.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, VP.getName());
-  if (Metadata *Val = VP.getValue()) {
+  if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
+    addType(ParamDIE, resolve(VP->getType()));
+  if (!VP->getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
+  if (Metadata *Val = VP->getValue()) {
     if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
-      addConstantValue(ParamDIE, CI, resolve(VP.getType()));
+      addConstantValue(ParamDIE, CI, resolve(VP->getType()));
     else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
       // For declaration non-type template parameters (such as global values and
       // functions)
@@ -1158,51 +1067,47 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
       // parameter, rather than a pointer to it.
       addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
       addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
-    } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+    } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
       assert(isa<MDString>(Val));
       addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
                 cast<MDString>(Val)->getString());
-    } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
-      assert(isa<MDNode>(Val));
-      DIArray A(cast<MDNode>(Val));
-      addTemplateParams(ParamDIE, A);
+    } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+      addTemplateParams(ParamDIE, cast<MDTuple>(Val));
     }
   }
 }
 
-/// getOrCreateNameSpace - Create a DIE for DINameSpace.
-DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
+DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(NS.getContext());
+  DIE *ContextDIE = getOrCreateContextDIE(NS->getScope());
 
   if (DIE *NDie = getDIE(NS))
     return NDie;
   DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
 
-  StringRef Name = NS.getName();
+  StringRef Name = NS->getName();
   if (!Name.empty())
-    addString(NDie, dwarf::DW_AT_name, NS.getName());
+    addString(NDie, dwarf::DW_AT_name, NS->getName());
   else
     Name = "(anonymous namespace)";
   DD->addAccelNamespace(Name, NDie);
-  addGlobalName(Name, NDie, NS.getContext());
+  addGlobalName(Name, NDie, NS->getScope());
   addSourceLine(NDie, NS);
   return &NDie;
 }
 
-/// getOrCreateSubprogramDIE - Create new DIE using SP.
-DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
+DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE (as is the case for member function
   // declarations).
   DIE *ContextDIE =
-      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext()));
+      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope()));
 
   if (DIE *SPDie = getDIE(SP))
     return SPDie;
 
-  if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  if (auto *SPDecl = SP->getDeclaration()) {
     if (!Minimal) {
       // Add subprogram definitions to the CU die directly.
       ContextDIE = &getUnitDie();
@@ -1216,36 +1121,35 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
 
   // Stop here and fill this in later, depending on whether or not this
   // subprogram turns out to have inlined instances or not.
-  if (SP.isDefinition())
+  if (SP->isDefinition())
     return &SPDie;
 
   applySubprogramAttributes(SP, SPDie);
   return &SPDie;
 }
 
-bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
+bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
                                                     DIE &SPDie) {
   DIE *DeclDie = nullptr;
   StringRef DeclLinkageName;
-  if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  if (auto *SPDecl = SP->getDeclaration()) {
     DeclDie = getDIE(SPDecl);
     assert(DeclDie && "This DIE should've already been constructed when the "
                       "definition DIE was created in "
                       "getOrCreateSubprogramDIE");
-    DeclLinkageName = SPDecl.getLinkageName();
+    DeclLinkageName = SPDecl->getLinkageName();
   }
 
   // Add function template parameters.
-  addTemplateParams(SPDie, SP.getTemplateParams());
+  addTemplateParams(SPDie, SP->getTemplateParams());
 
   // Add the linkage name if we have one and it isn't in the Decl.
-  StringRef LinkageName = SP.getLinkageName();
+  StringRef LinkageName = SP->getLinkageName();
   assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
           LinkageName == DeclLinkageName) &&
          "decl has a linkage name and it is different");
-  if (!LinkageName.empty() && DeclLinkageName.empty())
-    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
-              GlobalValue::getRealLinkageName(LinkageName));
+  if (DeclLinkageName.empty())
+    addLinkageName(SPDie, LinkageName);
 
   if (!DeclDie)
     return false;
@@ -1256,15 +1160,15 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
   return true;
 }
 
-void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
+void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
                                           bool Minimal) {
   if (!Minimal)
     if (applySubprogramDefinitionAttributes(SP, SPDie))
       return;
 
   // Constructors and operators for anonymous aggregates do not have names.
-  if (!SP.getName().empty())
-    addString(SPDie, dwarf::DW_AT_name, SP.getName());
+  if (!SP->getName().empty())
+    addString(SPDie, dwarf::DW_AT_name, SP->getName());
 
   // Skip the rest of the attributes under -gmlt to save space.
   if (Minimal)
@@ -1275,33 +1179,34 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
   // Add the prototype if we have a prototype and we have a C like
   // language.
   uint16_t Language = getLanguage();
-  if (SP.isPrototyped() &&
+  if (SP->isPrototyped() &&
       (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
        Language == dwarf::DW_LANG_ObjC))
     addFlag(SPDie, dwarf::DW_AT_prototyped);
 
-  DISubroutineType SPTy = SP.getType();
-  assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type &&
+  const DISubroutineType *SPTy = SP->getType();
+  assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type &&
          "the type of a subprogram should be a subroutine");
 
-  DITypeArray Args = SPTy.getTypeArray();
+  auto Args = SPTy->getTypeArray();
   // Add a return type. If this is a type like a C/C++ void type we don't add a
   // return type.
-  if (resolve(Args.getElement(0)))
-    addType(SPDie, DIType(resolve(Args.getElement(0))));
+  if (Args.size())
+    if (auto Ty = resolve(Args[0]))
+      addType(SPDie, Ty);
 
-  unsigned VK = SP.getVirtuality();
+  unsigned VK = SP->getVirtuality();
   if (VK) {
     addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
     DIELoc *Block = getDIELoc();
     addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+    addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
     ContainingTypeMap.insert(
-        std::make_pair(&SPDie, resolve(SP.getContainingType())));
+        std::make_pair(&SPDie, resolve(SP->getContainingType())));
   }
 
-  if (!SP.isDefinition()) {
+  if (!SP->isDefinition()) {
     addFlag(SPDie, dwarf::DW_AT_declaration);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
@@ -1309,41 +1214,40 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
     constructSubprogramArguments(SPDie, Args);
   }
 
-  if (SP.isArtificial())
+  if (SP->isArtificial())
     addFlag(SPDie, dwarf::DW_AT_artificial);
 
-  if (!SP.isLocalToUnit())
+  if (!SP->isLocalToUnit())
     addFlag(SPDie, dwarf::DW_AT_external);
 
-  if (SP.isOptimized())
+  if (SP->isOptimized())
     addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
 
-  if (unsigned isa = Asm->getISAEncoding()) {
+  if (unsigned isa = Asm->getISAEncoding())
     addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
-  }
 
-  if (SP.isLValueReference())
+  if (SP->isLValueReference())
     addFlag(SPDie, dwarf::DW_AT_reference);
 
-  if (SP.isRValueReference())
+  if (SP->isRValueReference())
     addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
 
-  if (SP.isProtected())
+  if (SP->isProtected())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (SP.isPrivate())
+  else if (SP->isPrivate())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
-  else if (SP.isPublic())
+  else if (SP->isPublic())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
 
-  if (SP.isExplicit())
+  if (SP->isExplicit())
     addFlag(SPDie, dwarf::DW_AT_explicit);
 }
 
-/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
+void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
+                                     DIE *IndexTy) {
   DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy);
 
@@ -1351,9 +1255,9 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
   // C/C++. The Count value is the number of elements.  Values are 64 bit. If
   // Count == -1 then the array is unbounded and we do not emit
   // DW_AT_lower_bound and DW_AT_count attributes.
-  int64_t LowerBound = SR.getLo();
+  int64_t LowerBound = SR->getLowerBound();
   int64_t DefaultLowerBound = getDefaultLowerBound();
-  int64_t Count = SR.getCount();
+  int64_t Count = SR->getCount();
 
   if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
     addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
@@ -1376,13 +1280,12 @@ DIE *DwarfUnit::getIndexTyDie() {
   return IndexTyDie;
 }
 
-/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  if (CTy.isVector())
+void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+  if (CTy->isVector())
     addFlag(Buffer, dwarf::DW_AT_GNU_vector);
 
   // Emit the element type.
-  addType(Buffer, resolve(CTy.getTypeDerivedFrom()));
+  addType(Buffer, resolve(CTy->getBaseType()));
 
   // Get an anonymous type for index type.
   // FIXME: This type should be passed down from the front end
@@ -1390,45 +1293,42 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
   DIE *IdxTy = getIndexTyDie();
 
   // Add subranges to array type.
-  DIArray Elements = CTy.getElements();
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Elements.getElement(i);
-    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+  DINodeArray Elements = CTy->getElements();
+  for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+    // FIXME: Should this really be such a loose cast?
+    if (auto *Element = dyn_cast_or_null<DINode>(Elements[i]))
+      if (Element->getTag() == dwarf::DW_TAG_subrange_type)
+        constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
   }
 }
 
-/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType.
-void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  DIArray Elements = CTy.getElements();
+void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+  DINodeArray Elements = CTy->getElements();
 
   // Add enumerators to enumeration type.
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIEnumerator Enum(Elements.getElement(i));
-    if (Enum.isEnumerator()) {
+  for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+    auto *Enum = dyn_cast_or_null<DIEnumerator>(Elements[i]);
+    if (Enum) {
       DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
-      StringRef Name = Enum.getName();
+      StringRef Name = Enum->getName();
       addString(Enumerator, dwarf::DW_AT_name, Name);
-      int64_t Value = Enum.getEnumValue();
+      int64_t Value = Enum->getValue();
       addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
               Value);
     }
   }
-  DIType DTy = resolve(CTy.getTypeDerivedFrom());
+  const DIType *DTy = resolve(CTy->getBaseType());
   if (DTy) {
     addType(Buffer, DTy);
     addFlag(Buffer, dwarf::DW_AT_enum_class);
   }
 }
 
-/// constructContainingTypeDIEs - Construct DIEs for types that contain
-/// vtables.
 void DwarfUnit::constructContainingTypeDIEs() {
-  for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
-                                                 CE = ContainingTypeMap.end();
+  for (auto CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end();
        CI != CE; ++CI) {
     DIE &SPDie = *CI->first;
-    DIDescriptor D(CI->second);
+    const DINode *D = CI->second;
     if (!D)
       continue;
     DIE *NDie = getDIE(D);
@@ -1438,18 +1338,17 @@ void DwarfUnit::constructContainingTypeDIEs() {
   }
 }
 
-/// constructMemberDIE - Construct member DIE from DIDerivedType.
-void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
-  DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer);
-  StringRef Name = DT.getName();
+void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
+  DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
+  StringRef Name = DT->getName();
   if (!Name.empty())
     addString(MemberDie, dwarf::DW_AT_name, Name);
 
-  addType(MemberDie, resolve(DT.getTypeDerivedFrom()));
+  addType(MemberDie, resolve(DT->getBaseType()));
 
   addSourceLine(MemberDie, DT);
 
-  if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) {
+  if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) {
 
     // For C++, virtual base classes are not at fixed offset. Use following
     // expression to extract appropriate offset from vtable.
@@ -1459,14 +1358,14 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits());
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
 
     addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
   } else {
-    uint64_t Size = DT.getSizeInBits();
+    uint64_t Size = DT->getSizeInBits();
     uint64_t FieldSize = getBaseTypeSize(DD, DT);
     uint64_t OffsetInBytes;
 
@@ -1475,8 +1374,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
       addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
       addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
 
-      uint64_t Offset = DT.getOffsetInBits();
-      uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+      uint64_t Offset = DT->getOffsetInBits();
+      uint64_t AlignMask = ~(DT->getAlignInBits() - 1);
       uint64_t HiMark = (Offset + FieldSize) & AlignMask;
       uint64_t FieldOffset = (HiMark - FieldSize);
       Offset -= FieldOffset;
@@ -1491,7 +1390,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
       OffsetInBytes = FieldOffset >> 3;
     } else
       // This is not a bitfield.
-      OffsetInBytes = DT.getOffsetInBits() >> 3;
+      OffsetInBytes = DT->getOffsetInBits() >> 3;
 
     if (DD->getDwarfVersion() <= 2) {
       DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc();
@@ -1503,49 +1402,48 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
               OffsetInBytes);
   }
 
-  if (DT.isProtected())
+  if (DT->isProtected())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
+  else if (DT->isPrivate())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
-  else if (DT.isPublic())
+  else if (DT->isPublic())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
-  if (DT.isVirtual())
+  if (DT->isVirtual())
     addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
             dwarf::DW_VIRTUALITY_virtual);
 
   // Objective-C properties.
-  if (MDNode *PNode = DT.getObjCProperty())
+  if (MDNode *PNode = DT->getObjCProperty())
     if (DIEEntry *PropertyDie = getDIEEntry(PNode))
       MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
                          PropertyDie);
 
-  if (DT.isArtificial())
+  if (DT->isArtificial())
     addFlag(MemberDie, dwarf::DW_AT_artificial);
 }
 
-/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member.
-DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
-  if (!DT.Verify())
+DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
+  if (!DT)
     return nullptr;
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext()));
+  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope()));
   assert(dwarf::isType(ContextDIE->getTag()) &&
          "Static member should belong to a type.");
 
   if (DIE *StaticMemberDIE = getDIE(DT))
     return StaticMemberDIE;
 
-  DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT);
+  DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
 
-  DIType Ty = resolve(DT.getTypeDerivedFrom());
+  const DIType *Ty = resolve(DT->getBaseType());
 
-  addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
+  addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
   addType(StaticMemberDIE, Ty);
   addSourceLine(StaticMemberDIE, DT);
   addFlag(StaticMemberDIE, dwarf::DW_AT_external);
@@ -1553,57 +1451,59 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
 
   // FIXME: We could omit private if the parent is a class_type, and
   // public if the parent is something else.
-  if (DT.isProtected())
+  if (DT->isProtected())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
+  else if (DT->isPrivate())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
-  else if (DT.isPublic())
+  else if (DT->isPublic())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
 
-  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
+  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
     addConstantValue(StaticMemberDIE, CI, Ty);
-  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
+  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
     addConstantFPValue(StaticMemberDIE, CFP);
 
   return &StaticMemberDIE;
 }
 
-void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
+void DwarfUnit::emitHeader(bool UseOffsets) {
   // Emit size of content not including length itself
-  Asm->OutStreamer.AddComment("Length of Unit");
+  Asm->OutStreamer->AddComment("Length of Unit");
   Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
 
-  Asm->OutStreamer.AddComment("DWARF version number");
+  Asm->OutStreamer->AddComment("DWARF version number");
   Asm->EmitInt16(DD->getDwarfVersion());
-  Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+  Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
+
   // We share one abbreviations table across all units so it's always at the
   // start of the section. Use a relocatable offset where needed to ensure
   // linking doesn't invalidate that offset.
-  if (ASectionSym)
-    Asm->EmitSectionOffset(ASectionSym, ASectionSym);
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  if (!UseOffsets)
+    Asm->emitSectionOffset(TLOF.getDwarfAbbrevSection()->getBeginSymbol());
   else
-    // Use a constant value when no symbol is provided.
     Asm->EmitInt32(0);
-  Asm->OutStreamer.AddComment("Address Size (in bytes)");
+
+  Asm->OutStreamer->AddComment("Address Size (in bytes)");
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 }
 
-void DwarfUnit::initSection(const MCSection *Section) {
+void DwarfUnit::initSection(MCSection *Section) {
   assert(!this->Section);
   this->Section = Section;
 }
 
-void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
-  DwarfUnit::emitHeader(ASectionSym);
-  Asm->OutStreamer.AddComment("Type Signature");
-  Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature));
-  Asm->OutStreamer.AddComment("Type DIE Offset");
+void DwarfTypeUnit::emitHeader(bool UseOffsets) {
+  DwarfUnit::emitHeader(UseOffsets);
+  Asm->OutStreamer->AddComment("Type Signature");
+  Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature));
+  Asm->OutStreamer->AddComment("Type DIE Offset");
   // In a skeleton type unit there is no type DIE so emit a zero offset.
-  Asm->OutStreamer.EmitIntValue(Ty ? Ty->getOffset() : 0,
-                                sizeof(Ty->getOffset()));
+  Asm->OutStreamer->EmitIntValue(Ty ? Ty->getOffset() : 0,
+                                 sizeof(Ty->getOffset()));
 }
 
 bool DwarfTypeUnit::isDwoUnit() const {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 7a5e47d..0d01a9e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -63,15 +63,15 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
-/// Unit - This dwarf writer support class manages information associated
-/// with a source file.
+/// This dwarf writer support class manages information associated with a
+/// source file.
 class DwarfUnit {
 protected:
-  /// UniqueID - a numeric ID unique among all CUs in the module
+  /// A numeric ID unique among all CUs in the module
   unsigned UniqueID;
 
-  /// Node - MDNode for the compile unit.
-  DICompileUnit CUNode;
+  /// MDNode for the compile unit.
+  const DICompileUnit *CUNode;
 
   /// Unit debug information entry.
   DIE UnitDie;
@@ -79,60 +79,62 @@ protected:
   /// Offset of the UnitDie from beginning of debug info section.
   unsigned DebugInfoOffset;
 
-  /// Asm - Target of Dwarf emission.
+  /// Target of Dwarf emission.
   AsmPrinter *Asm;
 
   // Holders for some common dwarf information.
   DwarfDebug *DD;
   DwarfFile *DU;
 
-  /// IndexTyDie - An anonymous type for index type.  Owned by UnitDie.
+  /// An anonymous type for index type.  Owned by UnitDie.
   DIE *IndexTyDie;
 
-  /// MDNodeToDieMap - Tracks the mapping of unit level debug information
-  /// variables to debug information entries.
+  /// Tracks the mapping of unit level debug information variables to debug
+  /// information entries.
   DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
 
-  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information
-  /// descriptors to debug information entries using a DIEEntry proxy.
+  /// Tracks the mapping of unit level debug information descriptors to debug
+  /// information entries using a DIEEntry proxy.
   DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
 
-  /// DIEBlocks - A list of all the DIEBlocks in use.
+  /// A list of all the DIEBlocks in use.
   std::vector<DIEBlock *> DIEBlocks;
-  
-  /// DIELocs - A list of all the DIELocs in use.
+
+  /// A list of all the DIELocs in use.
   std::vector<DIELoc *> DIELocs;
 
-  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
-  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+  /// This map is used to keep track of subprogram DIEs that need
+  /// DW_AT_containing_type attribute. This attribute points to a DIE that
   /// corresponds to the MDNode mapped with the subprogram DIE.
-  DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+  DenseMap<DIE *, const DINode *> ContainingTypeMap;
 
-  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
-  // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently.
+  // A preallocated DIEValue because 1 is used frequently.
   DIEInteger *DIEIntegerOne;
 
   /// The section this unit will be emitted in.
-  const MCSection *Section;
+  MCSection *Section;
 
-  DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
+  DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A,
             DwarfDebug *DW, DwarfFile *DWU);
 
-  void initSection(const MCSection *Section);
-
-  /// Add a string attribute data and value.
+  /// \brief Add a string attribute data and value.
+  ///
+  /// This is guaranteed to be in the local string pool instead of indirected.
   void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
   void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
-  bool applySubprogramDefinitionAttributes(DISubprogram SP, DIE &SPDie);
+  bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
 
 public:
   virtual ~DwarfUnit();
 
-  const MCSection *getSection() const {
+  void initSection(MCSection *Section);
+
+  MCSection *getSection() const {
     assert(Section);
     return Section;
   }
@@ -140,119 +142,133 @@ public:
   // Accessors.
   AsmPrinter* getAsmPrinter() const { return Asm; }
   unsigned getUniqueID() const { return UniqueID; }
-  uint16_t getLanguage() const { return CUNode.getLanguage(); }
-  DICompileUnit getCUNode() const { return CUNode; }
+  uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
+  const DICompileUnit *getCUNode() const { return CUNode; }
   DIE &getUnitDie() { return UnitDie; }
 
   unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
   void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
 
-  /// hasContent - Return true if this compile unit has something to write out.
+  /// \brief Return true if this compile unit has something to write out.
   bool hasContent() const { return !UnitDie.getChildren().empty(); }
 
-  /// getParentContextString - Get a string containing the language specific
-  /// context for a global name.
-  std::string getParentContextString(DIScope Context) const;
+  /// \brief Get string containing language specific context for a global name.
+  ///
+  /// Walks the metadata parent chain in a language specific manner (using the
+  /// compile unit language) and returns it as a string. This is done at the
+  /// metadata level because DIEs may not currently have been added to the
+  /// parent context and walking the DIEs looking for names is more expensive
+  /// than walking the metadata.
+  std::string getParentContextString(const DIScope *Context) const;
 
   /// Add a new global name to the compile unit.
-  virtual void addGlobalName(StringRef Name, DIE &Die, DIScope Context) {}
+  virtual void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) {
+  }
 
   /// Add a new global type to the compile unit.
-  virtual void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) {}
+  virtual void addGlobalType(const DIType *Ty, const DIE &Die,
+                             const DIScope *Context) {}
 
-  /// addAccelNamespace - Add a new name to the namespace accelerator table.
+  /// \brief Add a new name to the namespace accelerator table.
   void addAccelNamespace(StringRef Name, const DIE &Die);
 
-  /// getDIE - Returns the debug information entry map slot for the
-  /// specified debug variable. We delegate the request to DwarfDebug
-  /// when the MDNode can be part of the type system, since DIEs for
-  /// the type system can be shared across CUs and the mappings are
-  /// kept in DwarfDebug.
-  DIE *getDIE(DIDescriptor D) const;
+  /// \brief Returns the DIE map slot for the specified debug variable.
+  ///
+  /// We delegate the request to DwarfDebug when the MDNode can be part of the
+  /// type system, since DIEs for the type system can be shared across CUs and
+  /// the mappings are kept in DwarfDebug.
+  DIE *getDIE(const DINode *D) const;
 
-  /// getDIELoc - Returns a fresh newly allocated DIELoc.
+  /// \brief Returns a fresh newly allocated DIELoc.
   DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc(); }
 
-  /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
-  /// when the MDNode can be part of the type system, since DIEs for
-  /// the type system can be shared across CUs and the mappings are
-  /// kept in DwarfDebug.
-  void insertDIE(DIDescriptor Desc, DIE *D);
+  /// \brief Insert DIE into the map.
+  ///
+  /// We delegate the request to DwarfDebug when the MDNode can be part of the
+  /// type system, since DIEs for the type system can be shared across CUs and
+  /// the mappings are kept in DwarfDebug.
+  void insertDIE(const DINode *Desc, DIE *D);
 
-  /// addFlag - Add a flag that is true to the DIE.
+  /// \brief Add a flag that is true to the DIE.
   void addFlag(DIE &Die, dwarf::Attribute Attribute);
 
-  /// addUInt - Add an unsigned integer attribute data and value.
+  /// \brief Add an unsigned integer attribute data and value.
   void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
                uint64_t Integer);
 
   void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer);
 
-  /// addSInt - Add an signed integer attribute data and value.
+  /// \brief Add an signed integer attribute data and value.
   void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
                int64_t Integer);
 
   void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
 
-  /// addString - Add a string attribute data and value.
+  /// \brief Add a string attribute data and value.
+  ///
+  /// We always emit a reference to the string pool instead of immediate
+  /// strings so that DIEs have more predictable sizes. In the case of split
+  /// dwarf we emit an index into another table which gets us the static offset
+  /// into the string table.
   void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
-  /// addLabel - Add a Dwarf label attribute data and value.
+  /// \brief Add a Dwarf label attribute data and value.
   void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
                 const MCSymbol *Label);
 
   void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
 
-  /// addSectionOffset - Add an offset into a section attribute data and value.
-  ///
+  /// \brief Add an offset into a section attribute data and value.
   void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
 
-  /// addOpAddress - Add a dwarf op address data and value using the
-  /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+  /// \brief Add a dwarf op address data and value using the form given and an
+  /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
   void addOpAddress(DIELoc &Die, const MCSymbol *Label);
 
-  /// addLabelDelta - Add a label delta attribute data and value.
+  /// \brief Add a label delta attribute data and value.
   void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
                      const MCSymbol *Lo);
 
-  /// addDIEEntry - Add a DIE attribute data and value.
+  /// \brief Add a DIE attribute data and value.
   void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
 
-  /// addDIEEntry - Add a DIE attribute data and value.
+  /// \brief Add a DIE attribute data and value.
   void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry);
 
   void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
 
-  /// addBlock - Add block data.
+  /// \brief Add block data.
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
 
-  /// addBlock - Add block data.
+  /// \brief Add block data.
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
 
-  /// addSourceLine - Add location information to specified debug information
-  /// entry.
+  /// \brief Add location information to specified debug information entry.
   void addSourceLine(DIE &Die, unsigned Line, StringRef File,
                      StringRef Directory);
-  void addSourceLine(DIE &Die, DIVariable V);
-  void addSourceLine(DIE &Die, DIGlobalVariable G);
-  void addSourceLine(DIE &Die, DISubprogram SP);
-  void addSourceLine(DIE &Die, DIType Ty);
-  void addSourceLine(DIE &Die, DINameSpace NS);
-  void addSourceLine(DIE &Die, DIObjCProperty Ty);
-
-  /// addConstantValue - Add constant value entry in variable DIE.
-  void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty);
-  void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty);
-  void addConstantValue(DIE &Die, const APInt &Val, DIType Ty);
+  void addSourceLine(DIE &Die, const DILocalVariable *V);
+  void addSourceLine(DIE &Die, const DIGlobalVariable *G);
+  void addSourceLine(DIE &Die, const DISubprogram *SP);
+  void addSourceLine(DIE &Die, const DIType *Ty);
+  void addSourceLine(DIE &Die, const DINamespace *NS);
+  void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
+
+  /// \brief Add constant value entry in variable DIE.
+  void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty);
+  void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
+  void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
   void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
   void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
 
-  /// addConstantFPValue - Add constant value entry in variable DIE.
+  /// \brief Add constant value entry in variable DIE.
   void addConstantFPValue(DIE &Die, const MachineOperand &MO);
   void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
 
-  /// addTemplateParams - Add template parameters in buffer.
-  void addTemplateParams(DIE &Buffer, DIArray TParams);
+  /// \brief Add a linkage name, if it isn't empty.
+  void addLinkageName(DIE &Die, StringRef LinkageName);
+
+  /// \brief Add template parameters in buffer.
+  void addTemplateParams(DIE &Buffer, DINodeArray TParams);
 
   /// \brief Add register operand.
   /// \returns false if the register does not exist, e.g., because it was never
@@ -266,51 +282,45 @@ public:
   bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
 
   // FIXME: Should be reformulated in terms of addComplexAddress.
-  /// addBlockByrefAddress - Start with the address based on the location
-  /// provided, and generate the DWARF information necessary to find the
-  /// actual Block variable (navigating the Block struct) based on the
-  /// starting location.  Add the DWARF information to the die.  Obsolete,
-  /// please use addComplexAddress instead.
+  /// Start with the address based on the location provided, and generate the
+  /// DWARF information necessary to find the actual Block variable (navigating
+  /// the Block struct) based on the starting location.  Add the DWARF
+  /// information to the die.  Obsolete, please use addComplexAddress instead.
   void addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
                             dwarf::Attribute Attribute,
                             const MachineLocation &Location);
 
-  /// addType - Add a new type attribute to the specified entity. This takes
-  /// and attribute parameter because DW_AT_friend attributes are also
-  /// type references.
-  void addType(DIE &Entity, DIType Ty,
+  /// \brief Add a new type attribute to the specified entity.
+  ///
+  /// This takes and attribute parameter because DW_AT_friend attributes are
+  /// also type references.
+  void addType(DIE &Entity, const DIType *Ty,
                dwarf::Attribute Attribute = dwarf::DW_AT_type);
 
-  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
-  DIE *getOrCreateNameSpace(DINameSpace NS);
-
-  /// getOrCreateSubprogramDIE - Create new DIE using SP.
-  DIE *getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal = false);
+  DIE *getOrCreateNameSpace(const DINamespace *NS);
+  DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
 
-  void applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
+  void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
                                  bool Minimal = false);
 
-  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-  /// given DIType.
+  /// \brief Find existing DIE or create new DIE for the given type.
   DIE *getOrCreateTypeDIE(const MDNode *N);
 
-  /// getOrCreateContextDIE - Get context owner's DIE.
-  DIE *createTypeDIE(DICompositeType Ty);
+  /// \brief Get context owner's DIE.
+  DIE *createTypeDIE(const DICompositeType *Ty);
 
-  /// getOrCreateContextDIE - Get context owner's DIE.
-  DIE *getOrCreateContextDIE(DIScope Context);
+  /// \brief Get context owner's DIE.
+  DIE *getOrCreateContextDIE(const DIScope *Context);
 
-  /// constructContainingTypeDIEs - Construct DIEs for types that contain
-  /// vtables.
+  /// \brief Construct DIEs for types that contain vtables.
   void constructContainingTypeDIEs();
 
-  /// constructSubprogramArguments - Construct function argument DIEs.
-  void constructSubprogramArguments(DIE &Buffer, DITypeArray Args);
+  /// \brief Construct function argument DIEs.
+  void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args);
 
   /// Create a DIE with the given Tag, add the DIE to its parent, and
   /// call insertDIE if MD is not null.
-  DIE &createAndAddDIE(unsigned Tag, DIE &Parent,
-                       DIDescriptor N = DIDescriptor());
+  DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr);
 
   /// Compute the size of a header for this unit, not including the initial
   /// length field.
@@ -321,85 +331,68 @@ public:
   }
 
   /// Emit the header for this unit, not including the initial length field.
-  virtual void emitHeader(const MCSymbol *ASectionSym) const;
+  virtual void emitHeader(bool UseOffsets);
 
   virtual DwarfCompileUnit &getCU() = 0;
 
-  /// constructTypeDIE - Construct type DIE from DICompositeType.
-  void constructTypeDIE(DIE &Buffer, DICompositeType CTy);
+  void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
 
 protected:
-  /// getOrCreateStaticMemberDIE - Create new static data member DIE.
-  DIE *getOrCreateStaticMemberDIE(DIDerivedType DT);
+  /// \brief Create new static data member DIE.
+  DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
 
   /// Look up the source ID with the given directory and source file names. If
   /// none currently exists, create a new ID and insert it in the line table.
   virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
 
-  /// resolve - Look in the DwarfDebug map for the MDNode that
-  /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
+  /// \brief Look in the DwarfDebug map for the MDNode that corresponds to the
+  /// reference.
+  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
     return DD->resolve(Ref);
   }
 
 private:
-  /// constructTypeDIE - Construct basic type die from DIBasicType.
-  void constructTypeDIE(DIE &Buffer, DIBasicType BTy);
-
-  /// constructTypeDIE - Construct derived type die from DIDerivedType.
-  void constructTypeDIE(DIE &Buffer, DIDerivedType DTy);
-
-  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
-
-  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy);
-
-  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy);
-
-  /// constructMemberDIE - Construct member DIE from DIDerivedType.
-  void constructMemberDIE(DIE &Buffer, DIDerivedType DT);
-
-  /// constructTemplateTypeParameterDIE - Construct new DIE for the given
-  /// DITemplateTypeParameter.
+  void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
+  void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
+  void constructTypeDIE(DIE &Buffer, const DISubroutineType *DTy);
+  void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy);
+  void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+  void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+  void constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);
   void constructTemplateTypeParameterDIE(DIE &Buffer,
-                                         DITemplateTypeParameter TP);
-
-  /// constructTemplateValueParameterDIE - Construct new DIE for the given
-  /// DITemplateValueParameter.
+                                         const DITemplateTypeParameter *TP);
   void constructTemplateValueParameterDIE(DIE &Buffer,
-                                          DITemplateValueParameter TVP);
+                                          const DITemplateValueParameter *TVP);
 
-  /// getLowerBoundDefault - Return the default lower bound for an array. If the
-  /// DWARF version doesn't handle the language, return -1.
+  /// \brief Return the default lower bound for an array.
+  ///
+  /// If the DWARF version doesn't handle the language, return -1.
   int64_t getDefaultLowerBound() const;
 
-  /// getDIEEntry - Returns the debug information entry for the specified
-  /// debug variable.
+  /// \brief Returns the DIE entry for the specified debug variable.
   DIEEntry *getDIEEntry(const MDNode *N) const {
     return MDNodeToDIEEntryMap.lookup(N);
   }
 
-  /// insertDIEEntry - Insert debug information entry into the map.
+  /// \brief Insert debug information entry into the map.
   void insertDIEEntry(const MDNode *N, DIEEntry *E) {
     MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
   }
 
-  // getIndexTyDie - Get an anonymous type for index type.
+  /// \brief Get an anonymous type for index type.
   DIE *getIndexTyDie();
 
-  // setIndexTyDie - Set D as anonymous type for index which can be reused
-  // later.
+  /// \brief Set D as anonymous type for index which can be reused later.
   void setIndexTyDie(DIE *D) { IndexTyDie = D; }
 
-  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-  /// information entry.
+  /// \brief Creates a new DIEEntry to be a proxy for a debug information
+  /// entry.
   DIEEntry *createDIEEntry(DIE &Entry);
 
   /// If this is a named finished type then include it in the list of types for
   /// the accelerator tables.
-  void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE);
+  void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+                               const DIE &TyDIE);
 
   virtual bool isDwoUnit() const = 0;
 };
@@ -423,12 +416,11 @@ public:
   void setType(const DIE *Ty) { this->Ty = Ty; }
 
   /// Emit the header for this unit, not including the initial length field.
-  void emitHeader(const MCSymbol *ASectionSym) const override;
+  void emitHeader(bool UseOffsets) override;
   unsigned getHeaderSize() const override {
     return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
            sizeof(uint32_t);                               // Type DIE Offset
   }
-  using DwarfUnit::initSection;
   DwarfCompileUnit &getCU() override { return CU; }
 };
 } // end llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index f112120..1be3fd7 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -121,7 +121,8 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
       for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
         int TypeID = TypeIds[J];
         assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
-        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+        int ValueForTypeID =
+            isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
         unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
 
         int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
@@ -187,20 +188,12 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
   return MarkedNoUnwind;
 }
 
-/// Compute the call-site table.  The entry for an invoke has a try-range
-/// containing the call, a non-zero landing pad, and an appropriate action.  The
-/// entry for an ordinary call has a try-range containing the call and zero for
-/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
-/// must not be contained in the try-range of any entry - they form gaps in the
-/// table.  Entries must be ordered by try-range address.
-void EHStreamer::
-computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
-                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
-                     const SmallVectorImpl<unsigned> &FirstActions) {
+void EHStreamer::computePadMap(
+    const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+    RangeMapType &PadMap) {
   // Invokes and nounwind calls have entries in PadMap (due to being bracketed
   // by try-range labels when lowered).  Ordinary calls do not, so appropriate
   // try-ranges for them need be deduced so we can put them in the LSDA.
-  RangeMapType PadMap;
   for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
     const LandingPadInfo *LandingPad = LandingPads[i];
     for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
@@ -210,6 +203,20 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       PadMap[BeginLabel] = P;
     }
   }
+}
+
+/// Compute the call-site table.  The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action.  The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table.  Entries must be ordered by try-range address.
+void EHStreamer::
+computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     const SmallVectorImpl<unsigned> &FirstActions) {
+  RangeMapType PadMap;
+  computePadMap(LandingPads, PadMap);
 
   // The end label of the previous invoke or nounwind try-range.
   MCSymbol *LastLabel = nullptr;
@@ -252,7 +259,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       // instruction between the previous try-range and this one may throw,
       // create a call-site entry with no landing pad for the region between the
       // try-ranges.
-      if (SawPotentiallyThrowing && !IsSJLJ) {
+      if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) {
         CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
         CallSites.push_back(Site);
         PreviousIsInvoke = false;
@@ -269,14 +276,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         CallSiteEntry Site = {
           BeginLabel,
           LastLabel,
-          LandingPad->LandingPadLabel,
+          LandingPad,
           FirstActions[P.PadIndex]
         };
 
         // Try to merge with the previous call-site. SJLJ doesn't do this
         if (PreviousIsInvoke && !IsSJLJ) {
           CallSiteEntry &Prev = CallSites.back();
-          if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+          if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) {
             // Extend the range of the previous entry.
             Prev.EndLabel = Site.EndLabel;
             continue;
@@ -381,7 +388,7 @@ void EHStreamer::emitExceptionTable() {
   }
 
   // Type infos.
-  const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+  MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
   unsigned TTypeEncoding;
   unsigned TypeFormatSize;
 
@@ -427,20 +434,15 @@ void EHStreamer::emitExceptionTable() {
   // Sometimes we want not to emit the data into separate section (e.g. ARM
   // EHABI). In this case LSDASection will be NULL.
   if (LSDASection)
-    Asm->OutStreamer.SwitchSection(LSDASection);
+    Asm->OutStreamer->SwitchSection(LSDASection);
   Asm->EmitAlignment(2);
 
   // Emit the LSDA.
   MCSymbol *GCCETSym =
-    Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+    Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+
                                       Twine(Asm->getFunctionNumber()));
-  Asm->OutStreamer.EmitLabel(GCCETSym);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
-                                                Asm->getFunctionNumber()));
-
-  if (IsSJLJ)
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
-                                                  Asm->getFunctionNumber()));
+  Asm->OutStreamer->EmitLabel(GCCETSym);
+  Asm->OutStreamer->EmitLabel(Asm->getCurExceptionSym());
 
   // Emit the LSDA header.
   Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
@@ -484,7 +486,7 @@ void EHStreamer::emitExceptionTable() {
     SizeAlign = 0;
   }
 
-  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+  bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
   // SjLj Exception handling
   if (IsSJLJ) {
@@ -502,8 +504,8 @@ void EHStreamer::emitExceptionTable() {
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
       if (VerboseAsm) {
-        Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
-        Asm->OutStreamer.AddComment("  On exception at call site "+Twine(idx));
+        Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<");
+        Asm->OutStreamer->AddComment("  On exception at call site "+Twine(idx));
       }
       Asm->EmitULEB128(idx);
 
@@ -512,10 +514,10 @@ void EHStreamer::emitExceptionTable() {
       // the action table), and 0 indicates that there are no actions.
       if (VerboseAsm) {
         if (S.Action == 0)
-          Asm->OutStreamer.AddComment("  Action: cleanup");
+          Asm->OutStreamer->AddComment("  Action: cleanup");
         else
-          Asm->OutStreamer.AddComment("  Action: " +
-                                      Twine((S.Action - 1) / 2 + 1));
+          Asm->OutStreamer->AddComment("  Action: " +
+                                       Twine((S.Action - 1) / 2 + 1));
       }
       Asm->EmitULEB128(S.Action);
     }
@@ -551,40 +553,38 @@ void EHStreamer::emitExceptionTable() {
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
 
-      MCSymbol *EHFuncBeginSym =
-        Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+      MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
 
       MCSymbol *BeginLabel = S.BeginLabel;
       if (!BeginLabel)
         BeginLabel = EHFuncBeginSym;
       MCSymbol *EndLabel = S.EndLabel;
       if (!EndLabel)
-        EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-
+        EndLabel = Asm->getFunctionEnd();
 
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
       if (VerboseAsm)
-        Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
+        Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
       if (VerboseAsm)
-        Asm->OutStreamer.AddComment(Twine("  Call between ") +
-                                    BeginLabel->getName() + " and " +
-                                    EndLabel->getName());
+        Asm->OutStreamer->AddComment(Twine("  Call between ") +
+                                     BeginLabel->getName() + " and " +
+                                     EndLabel->getName());
       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      if (!S.PadLabel) {
+      if (!S.LPad) {
         if (VerboseAsm)
-          Asm->OutStreamer.AddComment("    has no landing pad");
-        Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
+          Asm->OutStreamer->AddComment("    has no landing pad");
+        Asm->OutStreamer->EmitIntValue(0, 4/*size*/);
       } else {
         if (VerboseAsm)
-          Asm->OutStreamer.AddComment(Twine("    jumps to ") +
-                                      S.PadLabel->getName());
-        Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+          Asm->OutStreamer->AddComment(Twine("    jumps to ") +
+                                       S.LPad->LandingPadLabel->getName());
+        Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4);
       }
 
       // Offset of the first associated action record, relative to the start of
@@ -592,10 +592,10 @@ void EHStreamer::emitExceptionTable() {
       // the action table), and 0 indicates that there are no actions.
       if (VerboseAsm) {
         if (S.Action == 0)
-          Asm->OutStreamer.AddComment("  On action: cleanup");
+          Asm->OutStreamer->AddComment("  On action: cleanup");
         else
-          Asm->OutStreamer.AddComment("  On action: " +
-                                      Twine((S.Action - 1) / 2 + 1));
+          Asm->OutStreamer->AddComment("  On action: " +
+                                       Twine((S.Action - 1) / 2 + 1));
       }
       Asm->EmitULEB128(S.Action);
     }
@@ -609,7 +609,7 @@ void EHStreamer::emitExceptionTable() {
 
     if (VerboseAsm) {
       // Emit comments that decode the action table.
-      Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");
+      Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<");
     }
 
     // Type Filter
@@ -618,13 +618,13 @@ void EHStreamer::emitExceptionTable() {
     //   type of the catch clauses or the types in the exception specification.
     if (VerboseAsm) {
       if (Action.ValueForTypeID > 0)
-        Asm->OutStreamer.AddComment("  Catch TypeInfo " +
-                                    Twine(Action.ValueForTypeID));
+        Asm->OutStreamer->AddComment("  Catch TypeInfo " +
+                                     Twine(Action.ValueForTypeID));
       else if (Action.ValueForTypeID < 0)
-        Asm->OutStreamer.AddComment("  Filter TypeInfo " +
-                                    Twine(Action.ValueForTypeID));
+        Asm->OutStreamer->AddComment("  Filter TypeInfo " +
+                                     Twine(Action.ValueForTypeID));
       else
-        Asm->OutStreamer.AddComment("  Cleanup");
+        Asm->OutStreamer->AddComment("  Cleanup");
     }
     Asm->EmitSLEB128(Action.ValueForTypeID);
 
@@ -634,10 +634,10 @@ void EHStreamer::emitExceptionTable() {
     //   or 0 if there is no next action record.
     if (VerboseAsm) {
       if (Action.NextAction == 0) {
-        Asm->OutStreamer.AddComment("  No further actions");
+        Asm->OutStreamer->AddComment("  No further actions");
       } else {
         unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
-        Asm->OutStreamer.AddComment("  Continue to action "+Twine(NextAction));
+        Asm->OutStreamer->AddComment("  Continue to action "+Twine(NextAction));
       }
     }
     Asm->EmitSLEB128(Action.NextAction);
@@ -652,13 +652,13 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
   const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
 
-  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+  bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
   int Entry = 0;
   // Emit the Catch TypeInfos.
   if (VerboseAsm && !TypeInfos.empty()) {
-    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
+    Asm->OutStreamer->AddBlankLine();
     Entry = TypeInfos.size();
   }
 
@@ -666,14 +666,14 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalValue *GV = *I;
     if (VerboseAsm)
-      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+      Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
     Asm->EmitTTypeReference(GV, TTypeEncoding);
   }
 
   // Emit the Exception Specifications.
   if (VerboseAsm && !FilterIds.empty()) {
-    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
-    Asm->OutStreamer.AddBlankLine();
+    Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
+    Asm->OutStreamer->AddBlankLine();
     Entry = 0;
   }
   for (std::vector<unsigned>::const_iterator
@@ -681,26 +681,10 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
     unsigned TypeID = *I;
     if (VerboseAsm) {
       --Entry;
-      if (TypeID != 0)
-        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+      if (isFilterEHSelector(TypeID))
+        Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
     }
 
     Asm->EmitULEB128(TypeID);
   }
 }
-
-/// Emit all exception information that should come after the content.
-void EHStreamer::endModule() {
-  llvm_unreachable("Should be implemented");
-}
-
-/// Gather pre-function exception information. Assumes it's being emitted
-/// immediately after the function entry point.
-void EHStreamer::beginFunction(const MachineFunction *MF) {
-  llvm_unreachable("Should be implemented");
-}
-
-/// Gather and emit post-function exception information.
-void EHStreamer::endFunction(const MachineFunction *) {
-  llvm_unreachable("Should be implemented");
-}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index e93055c..65973fa 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -23,6 +23,8 @@ class MachineModuleInfo;
 class MachineInstr;
 class MachineFunction;
 class AsmPrinter;
+class MCSymbol;
+class MCSymbolRefExpr;
 
 template <typename T>
 class SmallVectorImpl;
@@ -60,11 +62,11 @@ protected:
   /// Structure describing an entry in the call-site table.
   struct CallSiteEntry {
     // The 'try-range' is BeginLabel .. EndLabel.
-    MCSymbol *BeginLabel; // zero indicates the start of the function.
-    MCSymbol *EndLabel;   // zero indicates the end of the function.
+    MCSymbol *BeginLabel; // Null indicates the start of the function.
+    MCSymbol *EndLabel;   // Null indicates the end of the function.
 
-    // The landing pad starts at PadLabel.
-    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
+    // LPad contains the landing pad start labels.
+    const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
     unsigned Action;
   };
 
@@ -78,13 +80,15 @@ protected:
   /// `false' otherwise.
   bool callToNoUnwindFunction(const MachineInstr *MI);
 
+  void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     RangeMapType &PadMap);
+
   /// Compute the call-site table.  The entry for an invoke has a try-range
   /// containing the call, a non-zero landing pad and an appropriate action.
   /// The entry for an ordinary call has a try-range containing the call and
   /// zero for the landing pad and the action.  Calls marked 'nounwind' have
   /// no entry and must not be contained in the try-range of any entry - they
   /// form gaps in the table.  Entries must be ordered by try-range address.
-
   void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
                             const SmallVectorImpl<const LandingPadInfo *> &LPs,
                             const SmallVectorImpl<unsigned> &FirstActions);
@@ -112,19 +116,16 @@ protected:
 
   virtual void emitTypeInfos(unsigned TTypeEncoding);
 
+  // Helpers for for identifying what kind of clause an EH typeid or selector
+  // corresponds to. Negative selectors are for filter clauses, the zero
+  // selector is for cleanups, and positive selectors are for catch clauses.
+  static bool isFilterEHSelector(int Selector) { return Selector < 0; }
+  static bool isCleanupEHSelector(int Selector) { return Selector == 0; }
+  static bool isCatchEHSelector(int Selector) { return Selector > 0; }
+
 public:
   EHStreamer(AsmPrinter *A);
-  virtual ~EHStreamer();
-
-  /// Emit all exception information that should come after the content.
-  void endModule() override;
-
-  /// Gather pre-function exception information.  Assumes being emitted
-  /// immediately after the function entry point.
-  void beginFunction(const MachineFunction *MF) override;
-
-  /// Gather and emit post-function exception information.
-  void endFunction(const MachineFunction *) override;
+  ~EHStreamer() override;
 
   // Unused.
   void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index e293acd..eb9e4c1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -34,33 +34,30 @@ using namespace llvm;
 
 namespace {
 
-  class ErlangGCPrinter : public GCMetadataPrinter {
-  public:
-    void finishAssembly(Module &M, GCModuleInfo &Info,
-                        AsmPrinter &AP) override;
-  };
-
+class ErlangGCPrinter : public GCMetadataPrinter {
+public:
+  void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
 }
 
 static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
-X("erlang", "erlang-compatible garbage collector");
+    X("erlang", "erlang-compatible garbage collector");
 
-void llvm::linkErlangGCPrinter() { }
+void llvm::linkErlangGCPrinter() {}
 
 void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
                                      AsmPrinter &AP) {
-  MCStreamer &OS = AP.OutStreamer;
-  unsigned IntPtrSize =
-      AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+  MCStreamer &OS = *AP.OutStreamer;
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
 
   // Put this in a custom .note section.
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
-    .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
-                   SectionKind::getDataRel()));
+  OS.SwitchSection(
+      AP.getObjFileLowering().getContext().getELFSection(".note.gc",
+                                                         ELF::SHT_PROGBITS, 0));
 
   // For each function...
   for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(),
-         IE = Info.funcinfo_end();
+                                           IE = Info.funcinfo_end();
        FI != IE; ++FI) {
     GCFunctionInfo &MD = **FI;
     if (MD.getStrategy().getName() != getStrategy().getName())
@@ -91,7 +88,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
       // Emit the address of the safe point.
       OS.AddComment("safe point address");
       MCSymbol *Label = PI->Label;
-      AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+      AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/);
     }
 
     // Stack information never change in safe points! Only print info from the
@@ -104,8 +101,9 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
 
     // Emit stack arity, i.e. the number of stacked arguments.
     unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
-    unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
-                          MD.getFunction().arg_size() - RegisteredArgs : 0;
+    unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs
+                              ? MD.getFunction().arg_size() - RegisteredArgs
+                              : 0;
     OS.AddComment("stack arity");
     AP.EmitInt16(StackArity);
 
@@ -116,7 +114,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
     // And for each live root...
     for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
                                        LE = MD.live_end(PI);
-                                       LI != LE; ++LI) {
+         LI != LE; ++LI) {
       // Emit live root's offset within the stack frame.
       OS.AddComment("stack index (offset / wordsize)");
       AP.EmitInt16(LI->StackOffset / IntPtrSize);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index ddb14a0..802456b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -32,20 +32,17 @@ using namespace llvm;
 
 namespace {
 
-  class OcamlGCMetadataPrinter : public GCMetadataPrinter {
-  public:
-    void beginAssembly(Module &M, GCModuleInfo &Info,
-                       AsmPrinter &AP) override;
-    void finishAssembly(Module &M, GCModuleInfo &Info,
-                        AsmPrinter &AP) override;
-  };
-
+class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+public:
+  void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+  void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
 }
 
 static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
-Y("ocaml", "ocaml 3.10-compatible collector");
+    Y("ocaml", "ocaml 3.10-compatible collector");
 
-void llvm::linkOcamlGCPrinter() { }
+void llvm::linkOcamlGCPrinter() {}
 
 static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   const std::string &MId = M.getModuleIdentifier();
@@ -63,18 +60,18 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   SmallString<128> TmpStr;
   AP.Mang->getNameWithPrefix(TmpStr, SymName);
 
-  MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
+  MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr);
 
-  AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
-  AP.OutStreamer.EmitLabel(Sym);
+  AP.OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
+  AP.OutStreamer->EmitLabel(Sym);
 }
 
 void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
                                            AsmPrinter &AP) {
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(M, AP, "code_begin");
 
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
   EmitCamlGlobal(M, AP, "data_begin");
 }
 
@@ -96,24 +93,24 @@ void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
 ///
 void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
                                             AsmPrinter &AP) {
-  unsigned IntPtrSize =
-      AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
 
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(M, AP, "code_end");
 
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
   EmitCamlGlobal(M, AP, "data_end");
 
   // FIXME: Why does ocaml emit this??
-  AP.OutStreamer.EmitIntValue(0, IntPtrSize);
+  AP.OutStreamer->EmitIntValue(0, IntPtrSize);
 
-  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
   EmitCamlGlobal(M, AP, "frametable");
 
   int NumDescriptors = 0;
   for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
-         IE = Info.funcinfo_end(); I != IE; ++I) {
+                                           IE = Info.funcinfo_end();
+       I != IE; ++I) {
     GCFunctionInfo &FI = **I;
     if (FI.getStrategy().getName() != getStrategy().getName())
       // this function is managed by some other GC
@@ -123,7 +120,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
     }
   }
 
-  if (NumDescriptors >= 1<<16) {
+  if (NumDescriptors >= 1 << 16) {
     // Very rude!
     report_fatal_error(" Too much descriptor for ocaml GC");
   }
@@ -131,45 +128,50 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
   AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
 
   for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
-         IE = Info.funcinfo_end(); I != IE; ++I) {
+                                           IE = Info.funcinfo_end();
+       I != IE; ++I) {
     GCFunctionInfo &FI = **I;
     if (FI.getStrategy().getName() != getStrategy().getName())
       // this function is managed by some other GC
       continue;
 
     uint64_t FrameSize = FI.getFrameSize();
-    if (FrameSize >= 1<<16) {
+    if (FrameSize >= 1 << 16) {
       // Very rude!
       report_fatal_error("Function '" + FI.getFunction().getName() +
                          "' is too large for the ocaml GC! "
-                         "Frame size " + Twine(FrameSize) + ">= 65536.\n"
-                         "(" + Twine(uintptr_t(&FI)) + ")");
+                         "Frame size " +
+                         Twine(FrameSize) + ">= 65536.\n"
+                                            "(" +
+                         Twine(uintptr_t(&FI)) + ")");
     }
 
-    AP.OutStreamer.AddComment("live roots for " +
-                              Twine(FI.getFunction().getName()));
-    AP.OutStreamer.AddBlankLine();
+    AP.OutStreamer->AddComment("live roots for " +
+                               Twine(FI.getFunction().getName()));
+    AP.OutStreamer->AddBlankLine();
 
     for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
       size_t LiveCount = FI.live_size(J);
-      if (LiveCount >= 1<<16) {
+      if (LiveCount >= 1 << 16) {
         // Very rude!
         report_fatal_error("Function '" + FI.getFunction().getName() +
                            "' is too large for the ocaml GC! "
-                           "Live root count "+Twine(LiveCount)+" >= 65536.");
+                           "Live root count " +
+                           Twine(LiveCount) + " >= 65536.");
       }
 
-      AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize);
+      AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize);
       AP.EmitInt16(FrameSize);
       AP.EmitInt16(LiveCount);
 
       for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
-                                         KE = FI.live_end(J); K != KE; ++K) {
-        if (K->StackOffset >= 1<<16) {
+                                         KE = FI.live_end(J);
+           K != KE; ++K) {
+        if (K->StackOffset >= 1 << 16) {
           // Very rude!
           report_fatal_error(
-                 "GC root stack offset is outside of fixed stack frame and out "
-                 "of range for ocaml GC!");
+              "GC root stack offset is outside of fixed stack frame and out "
+              "of range for ocaml GC!");
         }
         AP.EmitInt16(K->StackOffset);
       }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 0f0ad75..dc6df9c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
@@ -48,8 +49,6 @@ Win64Exception::~Win64Exception() {}
 void Win64Exception::endModule() {
 }
 
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
 void Win64Exception::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
@@ -60,7 +59,7 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
+  const Function *Per = MF->getMMI().getPersonality();
 
   shouldEmitPersonality = hasLandingPads &&
     PerEncoding != dwarf::DW_EH_PE_omit && Per;
@@ -69,39 +68,437 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
+
+  // If this was an outlined handler, we need to define the label corresponding
+  // to the offset of the parent frame relative to the stack pointer after the
+  // prologue.
+  const Function *F = MF->getFunction();
+  const Function *ParentF = MMI->getWinEHParent(F);
+  if (F != ParentF) {
+    WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+    auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F);
+    if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) {
+      MCSymbol *HandlerTypeParentFrameOffset =
+          Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+              GlobalValue::getRealLinkageName(F->getName()));
+
+      // Emit a symbol assignment.
+      Asm->OutStreamer->EmitAssignment(
+          HandlerTypeParentFrameOffset,
+          MCConstantExpr::Create(I->second, Asm->OutContext));
+    }
+  }
+
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym);
+  Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym);
 
   if (!shouldEmitPersonality)
     return;
 
   const MCSymbol *PersHandlerSym =
       TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-  Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                Asm->getFunctionNumber()));
+  Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
 }
 
 /// endFunction - Gather and emit post-function exception information.
 ///
-void Win64Exception::endFunction(const MachineFunction *) {
+void Win64Exception::endFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                Asm->getFunctionNumber()));
+  EHPersonality Per = MMI->getPersonalityType();
 
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  // Get rid of any dead landing pads if we're not using a Windows EH scheme. In
+  // Windows EH schemes, the landing pad is not actually reachable. It only
+  // exists so that we can emit the right table data.
+  if (!isMSVCEHPersonality(Per))
+    MMI->TidyLandingPads();
 
   if (shouldEmitPersonality) {
-    Asm->OutStreamer.PushSection();
-    Asm->OutStreamer.EmitWinEHHandlerData();
-    emitExceptionTable();
-    Asm->OutStreamer.PopSection();
+    Asm->OutStreamer->PushSection();
+
+    // Emit an UNWIND_INFO struct describing the prologue.
+    Asm->OutStreamer->EmitWinEHHandlerData();
+
+    // Emit the tables appropriate to the personality function in use. If we
+    // don't recognize the personality, assume it uses an Itanium-style LSDA.
+    if (Per == EHPersonality::MSVC_Win64SEH)
+      emitCSpecificHandlerTable();
+    else if (Per == EHPersonality::MSVC_CXX)
+      emitCXXFrameHandler3Table(MF);
+    else
+      emitExceptionTable();
+
+    Asm->OutStreamer->PopSection();
+  }
+  Asm->OutStreamer->EmitWinCFIEndProc();
+}
+
+const MCExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
+  if (!Value)
+    return MCConstantExpr::Create(0, Asm->OutContext);
+  return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32,
+                                 Asm->OutContext);
+}
+
+const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) {
+  if (!GV)
+    return MCConstantExpr::Create(0, Asm->OutContext);
+  return createImageRel32(Asm->getSymbol(GV));
+}
+
+/// Emit the language-specific data that __C_specific_handler expects.  This
+/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
+/// up after faults with __try, __except, and __finally.  The typeinfo values
+/// are not really RTTI data, but pointers to filter functions that return an
+/// integer (1, 0, or -1) indicating how to handle the exception. For __finally
+/// blocks and other cleanups, the landing pad label is zero, and the filter
+/// function is actually a cleanup handler with the same prototype.  A catch-all
+/// entry is modeled with a null filter function field and a non-zero landing
+/// pad label.
+///
+/// Possible filter function return values:
+///   EXCEPTION_EXECUTE_HANDLER (1):
+///     Jump to the landing pad label after cleanups.
+///   EXCEPTION_CONTINUE_SEARCH (0):
+///     Continue searching this table or continue unwinding.
+///   EXCEPTION_CONTINUE_EXECUTION (-1):
+///     Resume execution at the trapping PC.
+///
+/// Inferred table structure:
+///   struct Table {
+///     int NumEntries;
+///     struct Entry {
+///       imagerel32 LabelStart;
+///       imagerel32 LabelEnd;
+///       imagerel32 FilterOrFinally;  // One means catch-all.
+///       imagerel32 LabelLPad;        // Zero means __finally.
+///     } Entries[NumEntries];
+///   };
+void Win64Exception::emitCSpecificHandlerTable() {
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+  // Simplifying assumptions for first implementation:
+  // - Cleanups are not implemented.
+  // - Filters are not implemented.
+
+  // The Itanium LSDA table sorts similar landing pads together to simplify the
+  // actions table, but we don't need that.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  LandingPads.reserve(PadInfos.size());
+  for (const auto &LP : PadInfos)
+    LandingPads.push_back(&LP);
+
+  // Compute label ranges for call sites as we would for the Itanium LSDA, but
+  // use an all zero action table because we aren't using these actions.
+  SmallVector<unsigned, 64> FirstActions;
+  FirstActions.resize(LandingPads.size());
+  SmallVector<CallSiteEntry, 64> CallSites;
+  computeCallSiteTable(CallSites, LandingPads, FirstActions);
+
+  MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
+  MCSymbol *EHFuncEndSym = Asm->getFunctionEnd();
+
+  // Emit the number of table entries.
+  unsigned NumEntries = 0;
+  for (const CallSiteEntry &CSE : CallSites) {
+    if (!CSE.LPad)
+      continue; // Ignore gaps.
+    NumEntries += CSE.LPad->SEHHandlers.size();
+  }
+  Asm->OutStreamer->EmitIntValue(NumEntries, 4);
+
+  // If there are no actions, we don't need to iterate again.
+  if (NumEntries == 0)
+    return;
+
+  // Emit the four-label records for each call site entry. The table has to be
+  // sorted in layout order, and the call sites should already be sorted.
+  for (const CallSiteEntry &CSE : CallSites) {
+    // Ignore gaps. Unlike the Itanium model, unwinding through a frame without
+    // an EH table entry will propagate the exception rather than terminating
+    // the program.
+    if (!CSE.LPad)
+      continue;
+    const LandingPadInfo *LPad = CSE.LPad;
+
+    // Compute the label range. We may reuse the function begin and end labels
+    // rather than forming new ones.
+    const MCExpr *Begin =
+        createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
+    const MCExpr *End;
+    if (CSE.EndLabel) {
+      // The interval is half-open, so we have to add one to include the return
+      // address of the last invoke in the range.
+      End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel),
+                                    MCConstantExpr::Create(1, Asm->OutContext),
+                                    Asm->OutContext);
+    } else {
+      End = createImageRel32(EHFuncEndSym);
+    }
+
+    // Emit an entry for each action.
+    for (SEHHandler Handler : LPad->SEHHandlers) {
+      Asm->OutStreamer->EmitValue(Begin, 4);
+      Asm->OutStreamer->EmitValue(End, 4);
+
+      // Emit the filter or finally function pointer, if present. Otherwise,
+      // emit '1' to indicate a catch-all.
+      const Function *F = Handler.FilterOrFinally;
+      if (F)
+        Asm->OutStreamer->EmitValue(createImageRel32(Asm->getSymbol(F)), 4);
+      else
+        Asm->OutStreamer->EmitIntValue(1, 4);
+
+      // Emit the recovery address, if present. Otherwise, this must be a
+      // finally.
+      const BlockAddress *BA = Handler.RecoverBA;
+      if (BA)
+        Asm->OutStreamer->EmitValue(
+            createImageRel32(Asm->GetBlockAddressSymbol(BA)), 4);
+      else
+        Asm->OutStreamer->EmitIntValue(0, 4);
+    }
+  }
+}
+
+void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
+  const Function *F = MF->getFunction();
+  const Function *ParentF = MMI->getWinEHParent(F);
+  auto &OS = *Asm->OutStreamer;
+  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+
+  StringRef ParentLinkageName =
+      GlobalValue::getRealLinkageName(ParentF->getName());
+
+  MCSymbol *FuncInfoXData =
+      Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", ParentLinkageName));
+  OS.EmitValue(createImageRel32(FuncInfoXData), 4);
+
+  // The Itanium LSDA table sorts similar landing pads together to simplify the
+  // actions table, but we don't need that.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  LandingPads.reserve(PadInfos.size());
+  for (const auto &LP : PadInfos)
+    LandingPads.push_back(&LP);
+
+  RangeMapType PadMap;
+  computePadMap(LandingPads, PadMap);
+
+  // The end label of the previous invoke or nounwind try-range.
+  MCSymbol *LastLabel = Asm->getFunctionBegin();
+
+  // Whether there is a potentially throwing instruction (currently this means
+  // an ordinary call) between the end of the previous try-range and now.
+  bool SawPotentiallyThrowing = false;
+
+  int LastEHState = -2;
+
+  // The parent function and the catch handlers contribute to the 'ip2state'
+  // table.
+
+  // Include ip2state entries for the beginning of the main function and
+  // for catch handler functions.
+  if (F == ParentF) {
+    FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
+    LastEHState = -1;
+  } else if (FuncInfo.HandlerBaseState.count(F)) {
+    FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, 
+                                     FuncInfo.HandlerBaseState[F]));
+    LastEHState = FuncInfo.HandlerBaseState[F];
+  }
+  for (const auto &MBB : *MF) {
+    for (const auto &MI : MBB) {
+      if (!MI.isEHLabel()) {
+        if (MI.isCall())
+          SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
+        continue;
+      }
+
+      // End of the previous try-range?
+      MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
+      if (BeginLabel == LastLabel)
+        SawPotentiallyThrowing = false;
+
+      // Beginning of a new try-range?
+      RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+      if (L == PadMap.end())
+        // Nope, it was just some random label.
+        continue;
+
+      const PadRange &P = L->second;
+      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+             "Inconsistent landing pad map!");
+
+      // FIXME: Should this be using FuncInfo.HandlerBaseState?
+      if (SawPotentiallyThrowing && LastEHState != -1) {
+        FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
+        SawPotentiallyThrowing = false;
+        LastEHState = -1;
+      }
+
+      if (LandingPad->WinEHState != LastEHState)
+        FuncInfo.IPToStateList.push_back(
+            std::make_pair(BeginLabel, LandingPad->WinEHState));
+      LastEHState = LandingPad->WinEHState;
+      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+    }
+  }
+
+  // Defer emission until we've visited the parent function and all the catch
+  // handlers.  Cleanups don't contribute to the ip2state table yet, so don't
+  // count them.
+  if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
+    return;
+  ++FuncInfo.NumIPToStateFuncsVisited;
+  if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
+    return;
+
+  MCSymbol *UnwindMapXData = nullptr;
+  MCSymbol *TryBlockMapXData = nullptr;
+  MCSymbol *IPToStateXData = nullptr;
+  if (!FuncInfo.UnwindMap.empty())
+    UnwindMapXData = Asm->OutContext.getOrCreateSymbol(
+        Twine("$stateUnwindMap$", ParentLinkageName));
+  if (!FuncInfo.TryBlockMap.empty())
+    TryBlockMapXData = Asm->OutContext.getOrCreateSymbol(
+        Twine("$tryMap$", ParentLinkageName));
+  if (!FuncInfo.IPToStateList.empty())
+    IPToStateXData = Asm->OutContext.getOrCreateSymbol(
+        Twine("$ip2state$", ParentLinkageName));
+
+  // FuncInfo {
+  //   uint32_t           MagicNumber
+  //   int32_t            MaxState;
+  //   UnwindMapEntry    *UnwindMap;
+  //   uint32_t           NumTryBlocks;
+  //   TryBlockMapEntry  *TryBlockMap;
+  //   uint32_t           IPMapEntries;
+  //   IPToStateMapEntry *IPToStateMap;
+  //   uint32_t           UnwindHelp; // (x64/ARM only)
+  //   ESTypeList        *ESTypeList;
+  //   int32_t            EHFlags;
+  // }
+  // EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
+  // EHFlags & 2 -> ???
+  // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
+  OS.EmitLabel(FuncInfoXData);
+  OS.EmitIntValue(0x19930522, 4);                      // MagicNumber
+  OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4);       // MaxState
+  OS.EmitValue(createImageRel32(UnwindMapXData), 4);   // UnwindMap
+  OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);     // NumTryBlocks
+  OS.EmitValue(createImageRel32(TryBlockMapXData), 4); // TryBlockMap
+  OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4);   // IPMapEntries
+  OS.EmitValue(createImageRel32(IPToStateXData), 4);   // IPToStateMap
+  OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4);  // UnwindHelp
+  OS.EmitIntValue(0, 4);                               // ESTypeList
+  OS.EmitIntValue(1, 4);                               // EHFlags
+
+  // UnwindMapEntry {
+  //   int32_t ToState;
+  //   void  (*Action)();
+  // };
+  if (UnwindMapXData) {
+    OS.EmitLabel(UnwindMapXData);
+    for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) {
+      OS.EmitIntValue(UME.ToState, 4);                // ToState
+      OS.EmitValue(createImageRel32(UME.Cleanup), 4); // Action
+    }
+  }
+
+  // TryBlockMap {
+  //   int32_t      TryLow;
+  //   int32_t      TryHigh;
+  //   int32_t      CatchHigh;
+  //   int32_t      NumCatches;
+  //   HandlerType *HandlerArray;
+  // };
+  if (TryBlockMapXData) {
+    OS.EmitLabel(TryBlockMapXData);
+    SmallVector<MCSymbol *, 1> HandlerMaps;
+    for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      MCSymbol *HandlerMapXData = nullptr;
+
+      if (!TBME.HandlerArray.empty())
+        HandlerMapXData =
+            Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$")
+                                                  .concat(Twine(I))
+                                                  .concat("$")
+                                                  .concat(ParentLinkageName));
+
+      HandlerMaps.push_back(HandlerMapXData);
+
+      int CatchHigh = -1;
+      for (WinEHHandlerType &HT : TBME.HandlerArray)
+        CatchHigh =
+            std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]);
+
+      assert(TBME.TryLow <= TBME.TryHigh);
+      OS.EmitIntValue(TBME.TryLow, 4);                    // TryLow
+      OS.EmitIntValue(TBME.TryHigh, 4);                   // TryHigh
+      OS.EmitIntValue(CatchHigh, 4);                      // CatchHigh
+      OS.EmitIntValue(TBME.HandlerArray.size(), 4);       // NumCatches
+      OS.EmitValue(createImageRel32(HandlerMapXData), 4); // HandlerArray
+    }
+
+    for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      MCSymbol *HandlerMapXData = HandlerMaps[I];
+      if (!HandlerMapXData)
+        continue;
+      // HandlerType {
+      //   int32_t         Adjectives;
+      //   TypeDescriptor *Type;
+      //   int32_t         CatchObjOffset;
+      //   void          (*Handler)();
+      //   int32_t         ParentFrameOffset; // x64 only
+      // };
+      OS.EmitLabel(HandlerMapXData);
+      for (const WinEHHandlerType &HT : TBME.HandlerArray) {
+        MCSymbol *ParentFrameOffset =
+            Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+                GlobalValue::getRealLinkageName(HT.Handler->getName()));
+        const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::Create(
+            ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+
+        // Get the frame escape label with the offset of the catch object. If
+        // the index is -1, then there is no catch object, and we should emit an
+        // offset of zero, indicating that no copy will occur.
+        const MCExpr *FrameAllocOffsetRef = nullptr;
+        if (HT.CatchObjRecoverIdx >= 0) {
+          MCSymbol *FrameAllocOffset =
+              Asm->OutContext.getOrCreateFrameAllocSymbol(
+                  GlobalValue::getRealLinkageName(ParentF->getName()),
+                  HT.CatchObjRecoverIdx);
+          FrameAllocOffsetRef = MCSymbolRefExpr::Create(
+              FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+        } else {
+          FrameAllocOffsetRef = MCConstantExpr::Create(0, Asm->OutContext);
+        }
+
+        OS.EmitIntValue(HT.Adjectives, 4);                    // Adjectives
+        OS.EmitValue(createImageRel32(HT.TypeDescriptor), 4); // Type
+        OS.EmitValue(FrameAllocOffsetRef, 4);                 // CatchObjOffset
+        OS.EmitValue(createImageRel32(HT.Handler), 4);        // Handler
+        OS.EmitValue(ParentFrameOffsetRef, 4);                // ParentFrameOffset
+      }
+    }
+  }
+
+  // IPToStateMapEntry {
+  //   void   *IP;
+  //   int32_t State;
+  // };
+  if (IPToStateXData) {
+    OS.EmitLabel(IPToStateXData);
+    for (auto &IPStatePair : FuncInfo.IPToStateList) {
+      OS.EmitValue(createImageRel32(IPStatePair.first), 4); // IP
+      OS.EmitIntValue(IPStatePair.second, 4);               // State
+    }
   }
-  Asm->OutStreamer.EmitWinCFIEndProc();
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h
index 538e132..5f4237f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h
@@ -17,7 +17,9 @@
 #include "EHStreamer.h"
 
 namespace llvm {
+class GlobalValue;
 class MachineFunction;
+class MCExpr;
 
 class Win64Exception : public EHStreamer {
   /// Per-function flag to indicate if personality info should be emitted.
@@ -29,12 +31,19 @@ class Win64Exception : public EHStreamer {
   /// Per-function flag to indicate if frame moves info should be emitted.
   bool shouldEmitMoves;
 
+  void emitCSpecificHandlerTable();
+
+  void emitCXXFrameHandler3Table(const MachineFunction *MF);
+
+  const MCExpr *createImageRel32(const MCSymbol *Value);
+  const MCExpr *createImageRel32(const GlobalValue *GV);
+
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
   Win64Exception(AsmPrinter *A);
-  virtual ~Win64Exception();
+  ~Win64Exception() override;
 
   /// Emit all exception information that should come after the content.
   void endModule() override;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index b5e0929..371e20a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -20,14 +20,13 @@ namespace llvm {
 
 StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   assert(S);
-  DIDescriptor D(S);
-  assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() ||
-          D.isLexicalBlockFile() || D.isLexicalBlock()) &&
+  assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) ||
+          isa<DILexicalBlockBase>(S)) &&
          "Unexpected scope info");
 
-  DIScope Scope(S);
-  StringRef Dir = Scope.getDirectory(),
-            Filename = Scope.getFilename();
+  auto *Scope = cast<DIScope>(S);
+  StringRef Dir = Scope->getDirectory(),
+            Filename = Scope->getFilename();
   char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
   if (Result)
     return Result;
@@ -40,7 +39,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   if (Filename.find(':') == 1)
     Filepath = Filename;
   else
-    Filepath = (Dir + Twine("\\") + Filename).str();
+    Filepath = (Dir + "\\" + Filename).str();
 
   // Canonicalize the path.  We have to do it textually because we may no longer
   // have access the file in the filesystem.
@@ -81,7 +80,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
 
 void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
                                                 const MachineFunction *MF) {
-  const MDNode *Scope = DL.getScope(MF->getFunction()->getContext());
+  const MDNode *Scope = DL.getScope();
   if (!Scope)
     return;
   StringRef Filename = getFullFilepath(Scope);
@@ -95,8 +94,8 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
   }
   FileNameRegistry.add(Filename);
 
-  MCSymbol *MCL = Asm->MMI->getContext().CreateTempSymbol();
-  Asm->OutStreamer.EmitLabel(MCL);
+  MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
+  Asm->OutStreamer->EmitLabel(MCL);
   CurFn->Instrs.push_back(MCL);
   InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine());
 }
@@ -121,7 +120,7 @@ void WinCodeViewLineTables::endModule() {
     return;
 
   assert(Asm != nullptr);
-  Asm->OutStreamer.SwitchSection(
+  Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
   Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
 
@@ -136,7 +135,7 @@ void WinCodeViewLineTables::endModule() {
     emitDebugInfoForFunction(VisitedFunctions[I]);
 
   // This subsection holds a file index to offset in string table table.
-  Asm->OutStreamer.AddComment("File index to string table offset subsection");
+  Asm->OutStreamer->AddComment("File index to string table offset subsection");
   Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
   size_t NumFilenames = FileNameRegistry.Infos.size();
   Asm->EmitInt32(8 * NumFilenames);
@@ -149,7 +148,7 @@ void WinCodeViewLineTables::endModule() {
   }
 
   // This subsection holds the string table.
-  Asm->OutStreamer.AddComment("String table");
+  Asm->OutStreamer->AddComment("String table");
   Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
   Asm->EmitInt32(FileNameRegistry.LastOffset);
   // The payload starts with a null character.
@@ -157,12 +156,12 @@ void WinCodeViewLineTables::endModule() {
 
   for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
     // Just emit unique filenames one by one, separated by a null character.
-    Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]);
+    Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]);
     Asm->EmitInt8(0);
   }
 
   // No more subsections. Fill with zeros to align the end of the section by 4.
-  Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
+  Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
 
   clear();
 }
@@ -190,8 +189,11 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
     return;
   assert(FI.End && "Don't know where the function ends?");
 
-  StringRef FuncName = getDISubprogram(GV).getDisplayName(),
-            GVName = GV->getName();
+  StringRef GVName = GV->getName();
+  StringRef FuncName;
+  if (auto *SP = getDISubprogram(GV))
+    FuncName = SP->getDisplayName();
+
   // FIXME Clang currently sets DisplayName to "bar" for a C++
   // "namespace_foo::bar" function, see PR21528.  Luckily, dbghelp.dll is trying
   // to demangle display names anyways, so let's just put a mangled name into
@@ -199,41 +201,41 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
   if (GVName.startswith("\01?"))
     FuncName = GVName.substr(1);
   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
-  MCSymbol *SymbolsBegin = Asm->MMI->getContext().CreateTempSymbol(),
-           *SymbolsEnd = Asm->MMI->getContext().CreateTempSymbol();
-  Asm->OutStreamer.AddComment("Symbol subsection for " + Twine(FuncName));
+  MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
+           *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
+  Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName));
   Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION);
-  EmitLabelDiff(Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
-  Asm->OutStreamer.EmitLabel(SymbolsBegin);
+  EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
+  Asm->OutStreamer->EmitLabel(SymbolsBegin);
   {
-    MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(),
-             *ProcSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
-    EmitLabelDiff(Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
-    Asm->OutStreamer.EmitLabel(ProcSegmentBegin);
+    MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(),
+             *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol();
+    EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
+    Asm->OutStreamer->EmitLabel(ProcSegmentBegin);
 
     Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START);
     // Some bytes of this segment don't seem to be required for basic debugging,
     // so just fill them with zeroes.
-    Asm->OutStreamer.EmitFill(12, 0);
+    Asm->OutStreamer->EmitFill(12, 0);
     // This is the important bit that tells the debugger where the function
     // code is located and what's its size:
-    EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
-    Asm->OutStreamer.EmitFill(12, 0);
-    Asm->OutStreamer.EmitCOFFSecRel32(Fn);
-    Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+    EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
+    Asm->OutStreamer->EmitFill(12, 0);
+    Asm->OutStreamer->EmitCOFFSecRel32(Fn);
+    Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
     Asm->EmitInt8(0);
     // Emit the function display name as a null-terminated string.
-    Asm->OutStreamer.EmitBytes(FuncName);
+    Asm->OutStreamer->EmitBytes(FuncName);
     Asm->EmitInt8(0);
-    Asm->OutStreamer.EmitLabel(ProcSegmentEnd);
+    Asm->OutStreamer->EmitLabel(ProcSegmentEnd);
 
     // We're done with this function.
     Asm->EmitInt16(0x0002);
     Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END);
   }
-  Asm->OutStreamer.EmitLabel(SymbolsEnd);
+  Asm->OutStreamer->EmitLabel(SymbolsEnd);
   // Every subsection must be aligned to a 4-byte boundary.
-  Asm->OutStreamer.EmitFill((-FuncName.size()) % 4, 0);
+  Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0);
 
   // PCs/Instructions are grouped into segments sharing the same filename.
   // Pre-calculate the lengths (in instructions) of these segments and store
@@ -252,21 +254,21 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
   FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
 
   // Emit a line table subsection, requred to do PC-to-file:line lookup.
-  Asm->OutStreamer.AddComment("Line table subsection for " + Twine(FuncName));
+  Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
   Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
-  MCSymbol *LineTableBegin = Asm->MMI->getContext().CreateTempSymbol(),
-           *LineTableEnd = Asm->MMI->getContext().CreateTempSymbol();
-  EmitLabelDiff(Asm->OutStreamer, LineTableBegin, LineTableEnd);
-  Asm->OutStreamer.EmitLabel(LineTableBegin);
+  MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
+           *LineTableEnd = Asm->MMI->getContext().createTempSymbol();
+  EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd);
+  Asm->OutStreamer->EmitLabel(LineTableBegin);
 
   // Identify the function this subsection is for.
-  Asm->OutStreamer.EmitCOFFSecRel32(Fn);
-  Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+  Asm->OutStreamer->EmitCOFFSecRel32(Fn);
+  Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
   // Insert padding after a 16-bit section index.
   Asm->EmitInt16(0);
 
   // Length of the function's code, in bytes.
-  EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
+  EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
 
   // PC-to-linenumber lookup table:
   MCSymbol *FileSegmentEnd = nullptr;
@@ -277,17 +279,17 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
     if (FilenameSegmentLengths.count(J)) {
       // We came to a beginning of a new filename segment.
       if (FileSegmentEnd)
-        Asm->OutStreamer.EmitLabel(FileSegmentEnd);
+        Asm->OutStreamer->EmitLabel(FileSegmentEnd);
       StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
       assert(FileNameRegistry.Infos.count(CurFilename));
       size_t IndexInStringTable =
           FileNameRegistry.Infos[CurFilename].FilenameID;
       // Each segment starts with the offset of the filename
       // in the string table.
-      Asm->OutStreamer.AddComment(
+      Asm->OutStreamer->AddComment(
           "Segment for file '" + Twine(CurFilename) + "' begins");
-      MCSymbol *FileSegmentBegin = Asm->MMI->getContext().CreateTempSymbol();
-      Asm->OutStreamer.EmitLabel(FileSegmentBegin);
+      MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol();
+      Asm->OutStreamer->EmitLabel(FileSegmentBegin);
       Asm->EmitInt32(8 * IndexInStringTable);
 
       // Number of PC records in the lookup table.
@@ -296,18 +298,18 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
 
       // Full size of the segment for this filename, including the prev two
       // records.
-      FileSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
-      EmitLabelDiff(Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
+      FileSegmentEnd = Asm->MMI->getContext().createTempSymbol();
+      EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
     }
 
     // The first PC with the given linenumber and the linenumber itself.
-    EmitLabelDiff(Asm->OutStreamer, Fn, Instr);
+    EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
     Asm->EmitInt32(InstrInfo[Instr].LineNumber);
   }
 
   if (FileSegmentEnd)
-    Asm->OutStreamer.EmitLabel(FileSegmentEnd);
-  Asm->OutStreamer.EmitLabel(LineTableEnd);
+    Asm->OutStreamer->EmitLabel(FileSegmentEnd);
+  Asm->OutStreamer->EmitLabel(LineTableEnd);
 }
 
 void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
@@ -327,7 +329,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
   DebugLoc PrologEndLoc;
   bool EmptyPrologue = true;
   for (const auto &MBB : *MF) {
-    if (!PrologEndLoc.isUnknown())
+    if (PrologEndLoc)
       break;
     for (const auto &MI : MBB) {
       if (MI.isDebugValue())
@@ -336,8 +338,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
       // First known non-DBG_VALUE and non-frame setup location marks
       // the beginning of the function body.
       // FIXME: do we need the first subcondition?
-      if (!MI.getFlag(MachineInstr::FrameSetup) &&
-          (!MI.getDebugLoc().isUnknown())) {
+      if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
         PrologEndLoc = MI.getDebugLoc();
         break;
       }
@@ -345,9 +346,8 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
     }
   }
   // Record beginning of function if we have a non-empty prologue.
-  if (!PrologEndLoc.isUnknown() && !EmptyPrologue) {
-    DebugLoc FnStartDL =
-        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
+  if (PrologEndLoc && !EmptyPrologue) {
+    DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
     maybeRecordLocation(FnStartDL, MF);
   }
 }
@@ -364,10 +364,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
     FnDebugInfo.erase(GV);
     VisitedFunctions.pop_back();
   } else {
-    // Define end label for subprogram.
-    MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol();
-    Asm->OutStreamer.EmitLabel(FunctionEndSym);
-    CurFn->End = FunctionEndSym;
+    CurFn->End = Asm->getFunctionEnd();
   }
   CurFn = nullptr;
 }
@@ -377,7 +374,7 @@ void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
     return;
   DebugLoc DL = MI->getDebugLoc();
-  if (DL == PrevInstLoc || DL.isUnknown())
+  if (DL == PrevInstLoc || !DL)
     return;
   maybeRecordLocation(DL, Asm->MF);
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 8492eac..c66d141 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -114,7 +114,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler {
 public:
   WinCodeViewLineTables(AsmPrinter *Asm);
 
-  ~WinCodeViewLineTables() {
+  ~WinCodeViewLineTables() override {
     for (DirAndFilenameToFilepathMapTy::iterator
              I = DirAndFilenameToFilepathMap.begin(),
              E = DirAndFilenameToFilepathMap.end();
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 12f6bd7..fa17108 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -31,10 +31,11 @@ using namespace llvm;
 namespace {
   class AtomicExpand: public FunctionPass {
     const TargetMachine *TM;
+    const TargetLowering *TLI;
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit AtomicExpand(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), TM(TM) {
+      : FunctionPass(ID), TM(TM), TLI(nullptr) {
       initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
     }
 
@@ -47,7 +48,7 @@ namespace {
     bool expandAtomicLoadToLL(LoadInst *LI);
     bool expandAtomicLoadToCmpXchg(LoadInst *LI);
     bool expandAtomicStore(StoreInst *SI);
-    bool expandAtomicRMW(AtomicRMWInst *AI);
+    bool tryExpandAtomicRMW(AtomicRMWInst *AI);
     bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
     bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
@@ -67,9 +68,9 @@ FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
 }
 
 bool AtomicExpand::runOnFunction(Function &F) {
-  if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
+  if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
     return false;
-  auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
+  TLI = TM->getSubtargetImpl(F)->getTargetLowering();
 
   SmallVector<Instruction *, 1> AtomicInsts;
 
@@ -91,7 +92,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
 
     auto FenceOrdering = Monotonic;
     bool IsStore, IsLoad;
-    if (TargetLowering->getInsertFencesForAtomic()) {
+    if (TLI->getInsertFencesForAtomic()) {
       if (LI && isAtLeastAcquire(LI->getOrdering())) {
         FenceOrdering = LI->getOrdering();
         LI->setOrdering(Monotonic);
@@ -107,9 +108,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
         FenceOrdering = RMWI->getOrdering();
         RMWI->setOrdering(Monotonic);
         IsStore = IsLoad = true;
-      } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
-                    (isAtLeastRelease(CASI->getSuccessOrdering()) ||
-                     isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+      } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+                 (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+                  isAtLeastAcquire(CASI->getSuccessOrdering()))) {
         // If a compare and swap is lowered to LL/SC, we can do smarter fence
         // insertion, with a stronger one on the success path than on the
         // failure path. As a result, fence insertion is directly done by
@@ -125,20 +126,22 @@ bool AtomicExpand::runOnFunction(Function &F) {
       }
     }
 
-    if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
+    if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
       MadeChange |= expandAtomicLoad(LI);
-    } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
+    } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
       MadeChange |= expandAtomicStore(SI);
     } else if (RMWI) {
       // There are two different ways of expanding RMW instructions:
       // - into a load if it is idempotent
       // - into a Cmpxchg/LL-SC loop otherwise
       // we try them in that order.
-      MadeChange |= (isIdempotentRMW(RMWI) &&
-                        simplifyIdempotentRMW(RMWI)) ||
-                    (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) &&
-                        expandAtomicRMW(RMWI));
-    } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
+
+      if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+        MadeChange = true;
+      } else {
+        MadeChange |= tryExpandAtomicRMW(RMWI);
+      }
+    } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
       MadeChange |= expandAtomicCmpXchg(CASI);
     }
   }
@@ -149,13 +152,9 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
                                          bool IsStore, bool IsLoad) {
   IRBuilder<> Builder(I);
 
-  auto LeadingFence =
-      TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
-      Builder, Order, IsStore, IsLoad);
+  auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
 
-  auto TrailingFence =
-      TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
-      Builder, Order, IsStore, IsLoad);
+  auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
   // The trailing fence is emitted before the instruction instead of after
   // because there is no easy way of setting Builder insertion point after
   // an instruction. So we must erase it from the BB, and insert it back
@@ -171,16 +170,13 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
 }
 
 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
-   if (TM->getSubtargetImpl()
-          ->getTargetLowering()
-          ->hasLoadLinkedStoreConditional())
+  if (TLI->hasLoadLinkedStoreConditional())
     return expandAtomicLoadToLL(LI);
   else
     return expandAtomicLoadToCmpXchg(LI);
 }
 
 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
-  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   IRBuilder<> Builder(LI);
 
   // On some architectures, load-linked instructions are atomic for larger
@@ -218,7 +214,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
   // atomic if implemented as a native store. So we replace them by an
   // atomic swap, that can be implemented for example as a ldrex/strex on ARM
   // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
-  // It is the responsibility of the target to only return true in
+  // It is the responsibility of the target to only signal expansion via
   // shouldExpandAtomicRMW in cases where this is required and possible.
   IRBuilder<> Builder(SI);
   AtomicRMWInst *AI =
@@ -227,16 +223,26 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
   SI->eraseFromParent();
 
   // Now we have an appropriate swap instruction, lower it as usual.
-  return expandAtomicRMW(AI);
+  return tryExpandAtomicRMW(AI);
 }
 
-bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
-  if (TM->getSubtargetImpl()
-          ->getTargetLowering()
-          ->hasLoadLinkedStoreConditional())
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+  case TargetLoweringBase::AtomicRMWExpansionKind::None:
+    return false;
+  case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
+    assert(TLI->hasLoadLinkedStoreConditional() &&
+           "TargetLowering requested we expand AtomicRMW instruction into "
+           "load-linked/store-conditional combos, but such instructions aren't "
+           "supported");
+
     return expandAtomicRMWToLLSC(AI);
-  else
+  }
+  case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
     return expandAtomicRMWToCmpXchg(AI);
+  }
+  }
+  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
 }
 
 /// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -277,7 +283,6 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
 }
 
 bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
-  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   AtomicOrdering MemOpOrder = AI->getOrdering();
   Value *Addr = AI->getPointerOperand();
   BasicBlock *BB = AI->getParent();
@@ -397,7 +402,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
 }
 
 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
-  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
   AtomicOrdering FailureOrder = CI->getFailureOrdering();
   Value *Addr = CI->getPointerOperand();
@@ -551,13 +555,10 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
 }
 
 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
-  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
-
   if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
     if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
       expandAtomicLoad(ResultingLoad);
     return true;
   }
-
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index 72da806..82f5c48 100644
--- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -15,633 +15,23 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <utility>
 using namespace llvm;
 
-static cl::opt<unsigned>
-PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
-  cl::desc("Threshold for partial unrolling"), cl::Hidden);
-
 #define DEBUG_TYPE "basictti"
 
-namespace {
-
-class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
-  const TargetMachine *TM;
-
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-  /// Estimate the cost overhead of SK_Alternate shuffle.
-  unsigned getAltShuffleOverhead(Type *Ty) const;
-
-  const TargetLoweringBase *getTLI() const {
-    return TM->getSubtargetImpl()->getTargetLowering();
-  }
-
-public:
-  BasicTTI() : ImmutablePass(ID), TM(nullptr) {
-    llvm_unreachable("This pass cannot be directly constructed");
-  }
-
-  BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) {
-    initializeBasicTTIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void initializePass() override {
-    pushTTIStack(this);
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    TargetTransformInfo::getAnalysisUsage(AU);
-  }
-
-  /// Pass identification.
-  static char ID;
-
-  /// Provide necessary pointer adjustments for the two base classes.
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &TargetTransformInfo::ID)
-      return (TargetTransformInfo*)this;
-    return this;
-  }
-
-  bool hasBranchDivergence() const override;
-
-  /// \name Scalar TTI Implementations
-  /// @{
-
-  bool isLegalAddImmediate(int64_t imm) const override;
-  bool isLegalICmpImmediate(int64_t imm) const override;
-  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
-                             int64_t BaseOffset, bool HasBaseReg,
-                             int64_t Scale) const override;
-  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
-                           int64_t BaseOffset, bool HasBaseReg,
-                           int64_t Scale) const override;
-  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
-  bool isTypeLegal(Type *Ty) const override;
-  unsigned getJumpBufAlignment() const override;
-  unsigned getJumpBufSize() const override;
-  bool shouldBuildLookupTables() const override;
-  bool haveFastSqrt(Type *Ty) const override;
-  void getUnrollingPreferences(const Function *F, Loop *L,
-                               UnrollingPreferences &UP) const override;
-
-  /// @}
-
-  /// \name Vector TTI Implementations
-  /// @{
-
-  unsigned getNumberOfRegisters(bool Vector) const override;
-  unsigned getMaxInterleaveFactor() const override;
-  unsigned getRegisterBitWidth(bool Vector) const override;
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind, OperandValueProperties,
-                                  OperandValueProperties) const override;
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
-                          int Index, Type *SubTp) const override;
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                            Type *Src) const override;
-  unsigned getCFInstrCost(unsigned Opcode) const override;
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy) const override;
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index) const override;
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const override;
-  unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
-                                 ArrayRef<Type*> Tys) const override;
-  unsigned getNumberOfParts(Type *Tp) const override;
-  unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override;
-  unsigned getReductionCost(unsigned Opcode, Type *Ty,
-                            bool IsPairwise) const override;
-
-  /// @}
-};
-
-}
-
-INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
-                   "Target independent code generator's TTI", true, true, false)
-char BasicTTI::ID = 0;
-
-ImmutablePass *
-llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) {
-  return new BasicTTI(TM);
-}
-
-bool BasicTTI::hasBranchDivergence() const { return false; }
-
-bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
-  return getTLI()->isLegalAddImmediate(imm);
-}
-
-bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
-  return getTLI()->isLegalICmpImmediate(imm);
-}
-
-bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
-                                     int64_t BaseOffset, bool HasBaseReg,
-                                     int64_t Scale) const {
-  TargetLoweringBase::AddrMode AM;
-  AM.BaseGV = BaseGV;
-  AM.BaseOffs = BaseOffset;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Scale;
-  return getTLI()->isLegalAddressingMode(AM, Ty);
-}
-
-int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
-                                   int64_t BaseOffset, bool HasBaseReg,
-                                   int64_t Scale) const {
-  TargetLoweringBase::AddrMode AM;
-  AM.BaseGV = BaseGV;
-  AM.BaseOffs = BaseOffset;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Scale;
-  return getTLI()->getScalingFactorCost(AM, Ty);
-}
-
-bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  return getTLI()->isTruncateFree(Ty1, Ty2);
-}
-
-bool BasicTTI::isTypeLegal(Type *Ty) const {
-  EVT T = getTLI()->getValueType(Ty);
-  return getTLI()->isTypeLegal(T);
-}
-
-unsigned BasicTTI::getJumpBufAlignment() const {
-  return getTLI()->getJumpBufAlignment();
-}
-
-unsigned BasicTTI::getJumpBufSize() const {
-  return getTLI()->getJumpBufSize();
-}
-
-bool BasicTTI::shouldBuildLookupTables() const {
-  const TargetLoweringBase *TLI = getTLI();
-  return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
-         TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
-}
-
-bool BasicTTI::haveFastSqrt(Type *Ty) const {
-  const TargetLoweringBase *TLI = getTLI();
-  EVT VT = TLI->getValueType(Ty);
-  return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
-}
-
-void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L,
-                                       UnrollingPreferences &UP) const {
-  // This unrolling functionality is target independent, but to provide some
-  // motivation for its intended use, for x86:
-
-  // According to the Intel 64 and IA-32 Architectures Optimization Reference
-  // Manual, Intel Core models and later have a loop stream detector
-  // (and associated uop queue) that can benefit from partial unrolling.
-  // The relevant requirements are:
-  //  - The loop must have no more than 4 (8 for Nehalem and later) branches
-  //    taken, and none of them may be calls.
-  //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
-
-  // According to the Software Optimization Guide for AMD Family 15h Processors,
-  // models 30h-4fh (Steamroller and later) have a loop predictor and loop
-  // buffer which can benefit from partial unrolling.
-  // The relevant requirements are:
-  //  - The loop must have fewer than 16 branches
-  //  - The loop must have less than 40 uops in all executed loop branches
-
-  // The number of taken branches in a loop is hard to estimate here, and
-  // benchmarking has revealed that it is better not to be conservative when
-  // estimating the branch count. As a result, we'll ignore the branch limits
-  // until someone finds a case where it matters in practice.
-
-  unsigned MaxOps;
-  const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F);
-  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
-    MaxOps = PartialUnrollingThreshold;
-  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
-    MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
-  else
-    return;
-
-  // Scan the loop: don't unroll loops with calls.
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I) {
-    BasicBlock *BB = *I;
-
-    for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
-      if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
-        ImmutableCallSite CS(J);
-        if (const Function *F = CS.getCalledFunction()) {
-          if (!TopTTI->isLoweredToCall(F))
-            continue;
-        }
-
-        return;
-      }
-  }
-
-  // Enable runtime and partial unrolling up to the specified size.
-  UP.Partial = UP.Runtime = true;
-  UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by the vectorizers.
-//
-//===----------------------------------------------------------------------===//
-
-unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert,
-                                            bool Extract) const {
-  assert (Ty->isVectorTy() && "Can only scalarize vectors");
-  unsigned Cost = 0;
-
-  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-    if (Insert)
-      Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
-    if (Extract)
-      Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-  }
-
-  return Cost;
-}
-
-unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
-  return 1;
-}
-
-unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
-  return 32;
-}
-
-unsigned BasicTTI::getMaxInterleaveFactor() const {
-  return 1;
-}
-
-unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                          OperandValueKind, OperandValueKind,
-                                          OperandValueProperties,
-                                          OperandValueProperties) const {
-  // Check if any of the operands are vector operands.
-  const TargetLoweringBase *TLI = getTLI();
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
-
-  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
-  // Assume that floating point arithmetic operations cost twice as much as
-  // integer operations.
-  unsigned OpCost = (IsFloat ? 2 : 1);
-
-  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-    // The operation is legal. Assume it costs 1.
-    // If the type is split to multiple registers, assume that there is some
-    // overhead to this.
-    // TODO: Once we have extract/insert subvector cost we need to use them.
-    if (LT.first > 1)
-      return LT.first * 2 * OpCost;
-    return LT.first * 1 * OpCost;
-  }
-
-  if (!TLI->isOperationExpand(ISD, LT.second)) {
-    // If the operation is custom lowered then assume
-    // thare the code is twice as expensive.
-    return LT.first * 2 * OpCost;
-  }
-
-  // Else, assume that we need to scalarize this op.
-  if (Ty->isVectorTy()) {
-    unsigned Num = Ty->getVectorNumElements();
-    unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType());
-    // return the cost of multiple scalar invocation plus the cost of inserting
-    // and extracting the values.
-    return getScalarizationOverhead(Ty, true, true) + Num * Cost;
-  }
-
-  // We don't know anything about this scalar instruction.
-  return OpCost;
-}
-
-unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
-  assert(Ty->isVectorTy() && "Can only shuffle vectors");
-  unsigned Cost = 0;
-  // Shuffle cost is equal to the cost of extracting element from its argument
-  // plus the cost of inserting them onto the result vector.
-
-  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
-  // 0 of first vector, index 1 of second vector,index 2 of first vector and
-  // finally index 3 of second vector and insert them at index <0,1,2,3> of
-  // result vector.
-  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-    Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
-    Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-  }
-  return Cost;
-}
-
-unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
-                                  Type *SubTp) const {
-  if (Kind == SK_Alternate) {
-    return getAltShuffleOverhead(Tp);
-  }
-  return 1;
-}
-
-unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                    Type *Src) const {
-  const TargetLoweringBase *TLI = getTLI();
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
-  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
-
-  // Check for NOOP conversions.
-  if (SrcLT.first == DstLT.first &&
-      SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
-      // Bitcast between types that are legalized to the same type are free.
-      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
-        return 0;
-  }
-
-  if (Opcode == Instruction::Trunc &&
-      TLI->isTruncateFree(SrcLT.second, DstLT.second))
-    return 0;
-
-  if (Opcode == Instruction::ZExt &&
-      TLI->isZExtFree(SrcLT.second, DstLT.second))
-    return 0;
-
-  // If the cast is marked as legal (or promote) then assume low cost.
-  if (SrcLT.first == DstLT.first &&
-      TLI->isOperationLegalOrPromote(ISD, DstLT.second))
-    return 1;
-
-  // Handle scalar conversions.
-  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
-
-    // Scalar bitcasts are usually free.
-    if (Opcode == Instruction::BitCast)
-      return 0;
-
-    // Just check the op cost. If the operation is legal then assume it costs 1.
-    if (!TLI->isOperationExpand(ISD, DstLT.second))
-      return  1;
-
-    // Assume that illegal scalar instruction are expensive.
-    return 4;
-  }
-
-  // Check vector-to-vector casts.
-  if (Dst->isVectorTy() && Src->isVectorTy()) {
-
-    // If the cast is between same-sized registers, then the check is simple.
-    if (SrcLT.first == DstLT.first &&
-        SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
-      // Assume that Zext is done using AND.
-      if (Opcode == Instruction::ZExt)
-        return 1;
-
-      // Assume that sext is done using SHL and SRA.
-      if (Opcode == Instruction::SExt)
-        return 2;
-
-      // Just check the op cost. If the operation is legal then assume it costs
-      // 1 and multiply by the type-legalization overhead.
-      if (!TLI->isOperationExpand(ISD, DstLT.second))
-        return SrcLT.first * 1;
-    }
-
-    // If we are converting vectors and the operation is illegal, or
-    // if the vectors are legalized to different types, estimate the
-    // scalarization costs.
-    unsigned Num = Dst->getVectorNumElements();
-    unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(),
-                                             Src->getScalarType());
-
-    // Return the cost of multiple scalar invocation plus the cost of
-    // inserting and extracting the values.
-    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
-  }
-
-  // We already handled vector-to-vector and scalar-to-scalar conversions. This
-  // is where we handle bitcast between vectors and scalars. We need to assume
-  //  that the conversion is scalarized in one way or another.
-  if (Opcode == Instruction::BitCast)
-    // Illegal bitcasts are done by storing and loading from a stack slot.
-    return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
-           (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
-
-  llvm_unreachable("Unhandled cast");
- }
-
-unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
-  // Branches are assumed to be predicted.
-  return 0;
-}
-
-unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy) const {
-  const TargetLoweringBase *TLI = getTLI();
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  // Selects on vectors are actually vector selects.
-  if (ISD == ISD::SELECT) {
-    assert(CondTy && "CondTy must exist");
-    if (CondTy->isVectorTy())
-      ISD = ISD::VSELECT;
-  }
-
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
-
-  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
-      !TLI->isOperationExpand(ISD, LT.second)) {
-    // The operation is legal. Assume it costs 1. Multiply
-    // by the type-legalization overhead.
-    return LT.first * 1;
-  }
-
-  // Otherwise, assume that the cast is scalarized.
-  if (ValTy->isVectorTy()) {
-    unsigned Num = ValTy->getVectorNumElements();
-    if (CondTy)
-      CondTy = CondTy->getScalarType();
-    unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
-                                               CondTy);
-
-    // Return the cost of multiple scalar invocation plus the cost of inserting
-    // and extracting the values.
-    return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
-  }
-
-  // Unknown scalar opcode.
-  return 1;
-}
-
-unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index) const {
-  std::pair<unsigned, MVT> LT =  getTLI()->getTypeLegalizationCost(Val->getScalarType());
-
-  return LT.first;
-}
-
-unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) const {
-  assert(!Src->isVoidTy() && "Invalid type");
-  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src);
-
-  // Assuming that all loads of legal types cost 1.
-  unsigned Cost = LT.first;
-
-  if (Src->isVectorTy() &&
-      Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
-    // This is a vector load that legalizes to a larger type than the vector
-    // itself. Unless the corresponding extending load or truncating store is
-    // legal, then this will scalarize.
-    TargetLowering::LegalizeAction LA = TargetLowering::Expand;
-    EVT MemVT = getTLI()->getValueType(Src, true);
-    if (MemVT.isSimple() && MemVT != MVT::Other) {
-      if (Opcode == Instruction::Store)
-        LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT());
-      else
-        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
-    }
-
-    if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
-      // This is a vector load/store for some illegal type that is scalarized.
-      // We must account for the cost of building or decomposing the vector.
-      Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
-                                            Opcode == Instruction::Store);
-    }
-  }
-
-  return Cost;
-}
-
-unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                                         ArrayRef<Type *> Tys) const {
-  unsigned ISD = 0;
-  switch (IID) {
-  default: {
-    // Assume that we need to scalarize this intrinsic.
-    unsigned ScalarizationCost = 0;
-    unsigned ScalarCalls = 1;
-    if (RetTy->isVectorTy()) {
-      ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
-      ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
-    }
-    for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
-      if (Tys[i]->isVectorTy()) {
-        ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
-        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
-      }
-    }
-
-    return ScalarCalls + ScalarizationCost;
-  }
-  // Look for intrinsics that can be lowered directly or turned into a scalar
-  // intrinsic call.
-  case Intrinsic::sqrt:    ISD = ISD::FSQRT;  break;
-  case Intrinsic::sin:     ISD = ISD::FSIN;   break;
-  case Intrinsic::cos:     ISD = ISD::FCOS;   break;
-  case Intrinsic::exp:     ISD = ISD::FEXP;   break;
-  case Intrinsic::exp2:    ISD = ISD::FEXP2;  break;
-  case Intrinsic::log:     ISD = ISD::FLOG;   break;
-  case Intrinsic::log10:   ISD = ISD::FLOG10; break;
-  case Intrinsic::log2:    ISD = ISD::FLOG2;  break;
-  case Intrinsic::fabs:    ISD = ISD::FABS;   break;
-  case Intrinsic::minnum:  ISD = ISD::FMINNUM; break;
-  case Intrinsic::maxnum:  ISD = ISD::FMAXNUM; break;
-  case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
-  case Intrinsic::floor:   ISD = ISD::FFLOOR; break;
-  case Intrinsic::ceil:    ISD = ISD::FCEIL;  break;
-  case Intrinsic::trunc:   ISD = ISD::FTRUNC; break;
-  case Intrinsic::nearbyint:
-                           ISD = ISD::FNEARBYINT; break;
-  case Intrinsic::rint:    ISD = ISD::FRINT;  break;
-  case Intrinsic::round:   ISD = ISD::FROUND; break;
-  case Intrinsic::pow:     ISD = ISD::FPOW;   break;
-  case Intrinsic::fma:     ISD = ISD::FMA;    break;
-  case Intrinsic::fmuladd: ISD = ISD::FMA;    break;
-  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
-  case Intrinsic::lifetime_start:
-  case Intrinsic::lifetime_end:
-    return 0;
-  }
-
-  const TargetLoweringBase *TLI = getTLI();
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
-
-  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-    // The operation is legal. Assume it costs 1.
-    // If the type is split to multiple registers, assume that there is some
-    // overhead to this.
-    // TODO: Once we have extract/insert subvector cost we need to use them.
-    if (LT.first > 1)
-      return LT.first * 2;
-    return LT.first * 1;
-  }
-
-  if (!TLI->isOperationExpand(ISD, LT.second)) {
-    // If the operation is custom lowered then assume
-    // thare the code is twice as expensive.
-    return LT.first * 2;
-  }
-
-  // If we can't lower fmuladd into an FMA estimate the cost as a floating
-  // point mul followed by an add.
-  if (IID == Intrinsic::fmuladd)
-    return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
-           TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
-
-  // Else, assume that we need to scalarize this intrinsic. For math builtins
-  // this will emit a costly libcall, adding call overhead and spills. Make it
-  // very expensive.
-  if (RetTy->isVectorTy()) {
-    unsigned Num = RetTy->getVectorNumElements();
-    unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
-                                                  Tys);
-    return 10 * Cost * Num;
-  }
-
-  // This is going to be turned into a library call, make it expensive.
-  return 10;
-}
-
-unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
-  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp);
-  return LT.first;
-}
-
-unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
-  return 0;
-}
+// This flag is used by the template base class for BasicTTIImpl, and here to
+// provide a definition.
+cl::opt<unsigned>
+    llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
+                                    cl::desc("Threshold for partial unrolling"),
+                                    cl::Hidden);
 
-unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty,
-                                    bool IsPairwise) const {
-  assert(Ty->isVectorTy() && "Expect a vector type");
-  unsigned NumVecElts = Ty->getVectorNumElements();
-  unsigned NumReduxLevels = Log2_32(NumVecElts);
-  unsigned ArithCost = NumReduxLevels *
-    TopTTI->getArithmeticInstrCost(Opcode, Ty);
-  // Assume the pairwise shuffles add a cost.
-  unsigned ShuffleCost =
-      NumReduxLevels * (IsPairwise + 1) *
-      TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty);
-  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
-}
+BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F)
+    : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 2128da1..b8d9a1a 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -601,8 +602,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
   if (EffectiveTailLen >= 2 &&
-      MF->getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+      MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
       (I1 == MBB1->begin() || I2 == MBB2->begin()))
     return true;
 
@@ -728,6 +728,62 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
   return true;
 }
 
+static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) {
+  auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end();
+  auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end();
+  if ((E1 - I1) != (E2 - I2))
+    return false;
+  for (; I1 != E1; ++I1, ++I2) {
+    if (**I1 != **I2)
+      return false;
+  }
+  return true;
+}
+
+static void
+removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
+                               MachineBasicBlock &MBBCommon) {
+  // Remove MMOs from memory operations in the common block
+  // when they do not match the ones from the block being tail-merged.
+  // This ensures later passes conservatively compute dependencies.
+  MachineBasicBlock *MBB = MBBIStartPos->getParent();
+  // Note CommonTailLen does not necessarily matches the size of
+  // the common BB nor all its instructions because of debug
+  // instructions differences.
+  unsigned CommonTailLen = 0;
+  for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
+    ++CommonTailLen;
+
+  MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();
+  MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();
+  MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
+  MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
+
+  while (CommonTailLen--) {
+    assert(MBBI != MBBIE && "Reached BB end within common tail length!");
+    (void)MBBIE;
+
+    if (MBBI->isDebugValue()) {
+      ++MBBI;
+      continue;
+    }
+
+    while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
+      ++MBBICommon;
+
+    assert(MBBICommon != MBBIECommon &&
+           "Reached BB end within common tail length!");
+    assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
+
+    if (MBBICommon->mayLoad() || MBBICommon->mayStore())
+      if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
+        MBBICommon->clearMemRefs();
+
+    ++MBBI;
+    ++MBBICommon;
+  }
+}
+
 // See if any of the blocks in MergePotentials (which all have a common single
 // successor, or all have no successor) can be tail-merged.  If there is a
 // successor, any blocks in MergePotentials that are not tail-merged and
@@ -762,7 +818,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
 
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
-  std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+  array_pod_sort(MergePotentials.begin(), MergePotentials.end());
 
   // Walk through equivalence sets looking for actual exact matches.
   while (MergePotentials.size() > 1) {
@@ -841,6 +897,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
         continue;
       DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
                    << (i == e-1 ? "" : ", "));
+      // Remove MMOs from memory operations as needed.
+      removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB);
       // Hack the end off BB i, making it jump to BB commonTailIndex instead.
       ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
       // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
@@ -1145,6 +1203,11 @@ ReoptimizeBlock:
 
     if (FallThrough == MF.end()) {
       // TODO: Simplify preds to not branch here if possible!
+    } else if (FallThrough->isLandingPad()) {
+      // Don't rewrite to a landing pad fallthough.  That could lead to the case
+      // where a BB jumps to more than one landing pad.
+      // TODO: Is it ever worth rewriting predecessors which don't already
+      // jump to a landing pad, and so can safely jump to the fallthrough?
     } else {
       // Rewrite all predecessors of the old block to go to the fallthrough
       // instead.
@@ -1620,8 +1683,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
   // Also avoid moving code above predicated instruction since it's hard to
   // reason about register liveness with predicated instruction.
   bool DontMoveAcrossStore = true;
-  if (!PI->isSafeToMove(TII, nullptr, DontMoveAcrossStore) ||
-      TII->isPredicated(PI))
+  if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(PI))
     return MBB->end();
 
 
@@ -1759,7 +1821,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       break;
 
     bool DontMoveAcrossStore = true;
-    if (!TIB->isSafeToMove(TII, nullptr, DontMoveAcrossStore))
+    if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore))
       break;
 
     // Remove kills from LocalDefsSet, these registers had short live ranges.
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 307dec5..2c6eaf3 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -21,13 +21,13 @@ using namespace llvm;
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAtomicExpandPass(Registry);
-  initializeBasicTTIPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeCodeGenPreparePass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
+  initializeDwarfEHPreparePass(Registry);
   initializeEarlyIfConverterPass(Registry);
-  initializeExpandPostRAPass(Registry);
   initializeExpandISelPseudosPass(Registry);
+  initializeExpandPostRAPass(Registry);
   initializeFinalizeMachineBundlesPass(Registry);
   initializeGCMachineCodeAnalysisPass(Registry);
   initializeGCModuleInfoPass(Registry);
@@ -37,31 +37,35 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeLiveStacksPass(Registry);
   initializeLiveVariablesPass(Registry);
   initializeLocalStackSlotPassPass(Registry);
+  initializeLowerIntrinsicsPass(Registry);
   initializeMachineBlockFrequencyInfoPass(Registry);
   initializeMachineBlockPlacementPass(Registry);
   initializeMachineBlockPlacementStatsPass(Registry);
-  initializeMachineCopyPropagationPass(Registry);
-  initializeMachineCombinerPass(Registry);
   initializeMachineCSEPass(Registry);
+  initializeMachineCombinerPass(Registry);
+  initializeMachineCopyPropagationPass(Registry);
   initializeMachineDominatorTreePass(Registry);
-  initializeMachinePostDominatorTreePass(Registry);
+  initializeMachineFunctionPrinterPassPass(Registry);
   initializeMachineLICMPass(Registry);
   initializeMachineLoopInfoPass(Registry);
   initializeMachineModuleInfoPass(Registry);
+  initializeMachinePostDominatorTreePass(Registry);
   initializeMachineSchedulerPass(Registry);
   initializeMachineSinkingPass(Registry);
   initializeMachineVerifierPassPass(Registry);
   initializeOptimizePHIsPass(Registry);
+  initializePEIPass(Registry);
   initializePHIEliminationPass(Registry);
   initializePeepholeOptimizerPass(Registry);
   initializePostMachineSchedulerPass(Registry);
   initializePostRASchedulerPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
-  initializePEIPass(Registry);
   initializeRegisterCoalescerPass(Registry);
+  initializeShrinkWrapPass(Registry);
   initializeSlotIndexesPass(Registry);
-  initializeStackProtectorPass(Registry);
   initializeStackColoringPass(Registry);
+  initializeStackMapLivenessPass(Registry);
+  initializeStackProtectorPass(Registry);
   initializeStackSlotColoringPass(Registry);
   initializeTailDuplicatePassPass(Registry);
   initializeTargetPassConfigPass(Registry);
@@ -71,9 +75,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeUnreachableMachineBlockElimPass(Registry);
   initializeVirtRegMapPass(Registry);
   initializeVirtRegRewriterPass(Registry);
-  initializeLowerIntrinsicsPass(Registry);
-  initializeMachineFunctionPrinterPassPass(Registry);
-  initializeStackMapLivenessPass(Registry);
+  initializeWinEHPreparePass(Registry);
 }
 
 void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 82cd380..2c1858b 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
@@ -32,13 +33,13 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -72,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts(
   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
   cl::desc("Disable branch optimizations in CodeGenPrepare"));
 
+static cl::opt<bool>
+    DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
+                  cl::desc("Disable GC optimizations in CodeGenPrepare"));
+
 static cl::opt<bool> DisableSelectToBranch(
   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
   cl::desc("Disable select to branch conversion."));
@@ -119,7 +124,6 @@ class TypePromotionTransaction;
     const TargetLowering *TLI;
     const TargetTransformInfo *TTI;
     const TargetLibraryInfo *TLInfo;
-    DominatorTree *DT;
 
     /// CurInstIterator - As we scan instructions optimizing them, this is the
     /// next instruction to optimize.  Xforms that can invalidate this should
@@ -137,8 +141,7 @@ class TypePromotionTransaction;
     /// promotion for the current function.
     InstrToOrigTy PromotedInsts;
 
-    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
-    /// be updated.
+    /// ModifiedDT - If CFG is modified in anyway.
     bool ModifiedDT;
 
     /// OptSize - True if optimizing for size.
@@ -156,8 +159,8 @@ class TypePromotionTransaction;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<TargetLibraryInfo>();
-      AU.addRequired<TargetTransformInfo>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
     }
 
   private:
@@ -181,8 +184,9 @@ class TypePromotionTransaction;
     bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
                         Instruction *&Inst,
                         const SmallVectorImpl<Instruction *> &Exts,
-                        unsigned CreatedInst);
+                        unsigned CreatedInstCost);
     bool splitBranchCondition(Function &F);
+    bool simplifyOffsetableRelocate(Instruction &I);
   };
 }
 
@@ -205,14 +209,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   ModifiedDT = false;
   if (TM)
-    TLI = TM->getSubtargetImpl()->getTargetLowering();
-  TLInfo = &getAnalysis<TargetLibraryInfo>();
-  TTI = &getAnalysis<TargetTransformInfo>();
-  DominatorTreeWrapperPass *DTWP =
-      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                           Attribute::OptimizeForSize);
+    TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+  TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
@@ -248,9 +248,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       BasicBlock *BB = I++;
       bool ModifiedDTOnIteration = false;
       MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
-      
+
       // Restart BB iteration if the dominator tree of the Function was changed
-      ModifiedDT |= ModifiedDTOnIteration;
       if (ModifiedDTOnIteration)
         break;
     }
@@ -293,13 +292,18 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     if (EverMadeChange || MadeChange)
       MadeChange |= EliminateFallThrough(F);
 
-    if (MadeChange)
-      ModifiedDT = true;
     EverMadeChange |= MadeChange;
   }
 
-  if (ModifiedDT && DT)
-    DT->recalculate(F);
+  if (!DisableGCOpts) {
+    SmallVector<Instruction *, 2> Statepoints;
+    for (BasicBlock &BB : F)
+      for (Instruction &I : BB)
+        if (isStatepoint(I))
+          Statepoints.push_back(&I);
+    for (auto &I : Statepoints)
+      EverMadeChange |= simplifyOffsetableRelocate(*I);
+  }
 
   return EverMadeChange;
 }
@@ -326,7 +330,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
       // Remember if SinglePred was the entry block of the function.
       // If so, we will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(BB, this);
+      MergeBasicBlockIntoOnlyPred(BB, nullptr);
 
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -466,7 +470,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
       // Remember if SinglePred was the entry block of the function.  If so, we
       // will need to move BB back to the entry position.
       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(DestBB, this);
+      MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
 
       if (isEntry && BB != &BB->getParent()->getEntryBlock())
         BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -508,19 +512,188 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   // The PHIs are now updated, change everything that refers to BB to use
   // DestBB and remove BB.
   BB->replaceAllUsesWith(DestBB);
-  if (DT && !ModifiedDT) {
-    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
-    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
-    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
-    DT->changeImmediateDominator(DestBB, NewIDom);
-    DT->eraseNode(BB);
-  }
   BB->eraseFromParent();
   ++NumBlocksElim;
 
   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
+// Computes a map of base pointer relocation instructions to corresponding
+// derived pointer relocation instructions given a vector of all relocate calls
+static void computeBaseDerivedRelocateMap(
+    const SmallVectorImpl<User *> &AllRelocateCalls,
+    DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
+        RelocateInstMap) {
+  // Collect information in two maps: one primarily for locating the base object
+  // while filling the second map; the second map is the final structure holding
+  // a mapping between Base and corresponding Derived relocate calls
+  DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
+  for (auto &U : AllRelocateCalls) {
+    GCRelocateOperands ThisRelocate(U);
+    IntrinsicInst *I = cast<IntrinsicInst>(U);
+    auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
+                            ThisRelocate.getDerivedPtrIndex());
+    RelocateIdxMap.insert(std::make_pair(K, I));
+  }
+  for (auto &Item : RelocateIdxMap) {
+    std::pair<unsigned, unsigned> Key = Item.first;
+    if (Key.first == Key.second)
+      // Base relocation: nothing to insert
+      continue;
+
+    IntrinsicInst *I = Item.second;
+    auto BaseKey = std::make_pair(Key.first, Key.first);
+
+    // We're iterating over RelocateIdxMap so we cannot modify it.
+    auto MaybeBase = RelocateIdxMap.find(BaseKey);
+    if (MaybeBase == RelocateIdxMap.end())
+      // TODO: We might want to insert a new base object relocate and gep off
+      // that, if there are enough derived object relocates.
+      continue;
+
+    RelocateInstMap[MaybeBase->second].push_back(I);
+  }
+}
+
+// Accepts a GEP and extracts the operands into a vector provided they're all
+// small integer constants
+static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
+                                          SmallVectorImpl<Value *> &OffsetV) {
+  for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+    // Only accept small constant integer operands
+    auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!Op || Op->getZExtValue() > 20)
+      return false;
+  }
+
+  for (unsigned i = 1; i < GEP->getNumOperands(); i++)
+    OffsetV.push_back(GEP->getOperand(i));
+  return true;
+}
+
+// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
+// replace, computes a replacement, and affects it.
+static bool
+simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
+                          const SmallVectorImpl<IntrinsicInst *> &Targets) {
+  bool MadeChange = false;
+  for (auto &ToReplace : Targets) {
+    GCRelocateOperands MasterRelocate(RelocatedBase);
+    GCRelocateOperands ThisRelocate(ToReplace);
+
+    assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
+           "Not relocating a derived object of the original base object");
+    if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
+      // A duplicate relocate call. TODO: coalesce duplicates.
+      continue;
+    }
+
+    Value *Base = ThisRelocate.getBasePtr();
+    auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
+    if (!Derived || Derived->getPointerOperand() != Base)
+      continue;
+
+    SmallVector<Value *, 2> OffsetV;
+    if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
+      continue;
+
+    // Create a Builder and replace the target callsite with a gep
+    assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator");
+
+    // Insert after RelocatedBase
+    IRBuilder<> Builder(RelocatedBase->getNextNode());
+    Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+
+    // If gc_relocate does not match the actual type, cast it to the right type.
+    // In theory, there must be a bitcast after gc_relocate if the type does not
+    // match, and we should reuse it to get the derived pointer. But it could be
+    // cases like this:
+    // bb1:
+    //  ...
+    //  %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+    //  br label %merge
+    //
+    // bb2:
+    //  ...
+    //  %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+    //  br label %merge
+    //
+    // merge:
+    //  %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
+    //  %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
+    //
+    // In this case, we can not find the bitcast any more. So we insert a new bitcast
+    // no matter there is already one or not. In this way, we can handle all cases, and
+    // the extra bitcast should be optimized away in later passes.
+    Instruction *ActualRelocatedBase = RelocatedBase;
+    if (RelocatedBase->getType() != Base->getType()) {
+      ActualRelocatedBase =
+          cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
+    }
+    Value *Replacement = Builder.CreateGEP(
+        Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
+    Instruction *ReplacementInst = cast<Instruction>(Replacement);
+    Replacement->takeName(ToReplace);
+    // If the newly generated derived pointer's type does not match the original derived
+    // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
+    Instruction *ActualReplacement = ReplacementInst;
+    if (ReplacementInst->getType() != ToReplace->getType()) {
+      ActualReplacement =
+          cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
+    }
+    ToReplace->replaceAllUsesWith(ActualReplacement);
+    ToReplace->eraseFromParent();
+
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+// Turns this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = relocate(%tok, i32 4, i32 4)
+// %ptr' = relocate(%tok, i32 4, i32 5)
+// %val = load %ptr'
+//
+// into this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = gc.relocate(%tok, i32 4, i32 4)
+// %ptr' = gep %base' + 15
+// %val = load %ptr'
+bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
+  bool MadeChange = false;
+  SmallVector<User *, 2> AllRelocateCalls;
+
+  for (auto *U : I.users())
+    if (isGCRelocate(dyn_cast<Instruction>(U)))
+      // Collect all the relocate calls associated with a statepoint
+      AllRelocateCalls.push_back(U);
+
+  // We need atleast one base pointer relocation + one derived pointer
+  // relocation to mangle
+  if (AllRelocateCalls.size() < 2)
+    return false;
+
+  // RelocateInstMap is a mapping from the base relocate instruction to the
+  // corresponding derived relocate instructions
+  DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
+  computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
+  if (RelocateInstMap.empty())
+    return false;
+
+  for (auto &Item : RelocateInstMap)
+    // Item.first is the RelocatedBase to offset against
+    // Item.second is the vector of Targets to replace
+    MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
+  return MadeChange;
+}
+
 /// SinkCast - Sink the specified cast instruction into its user blocks
 static bool SinkCast(CastInst *CI) {
   BasicBlock *DefBB = CI->getParent();
@@ -555,11 +728,11 @@ static bool SinkCast(CastInst *CI) {
       InsertedCast =
         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
                          InsertPt);
-      MadeChange = true;
     }
 
     // Replace a use of the cast with a use of the new cast.
     TheUse = InsertedCast;
+    MadeChange = true;
     ++NumCastUses;
   }
 
@@ -609,13 +782,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   return SinkCast(CI);
 }
 
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+  Value *A, *B;
+  Instruction *AddI;
+  if (!match(CI,
+             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+    return false;
+
+  Type *Ty = AddI->getType();
+  if (!isa<IntegerType>(Ty))
+    return false;
+
+  // We don't want to move around uses of condition values this late, so we we
+  // check if it is legal to create the call to the intrinsic in the basic
+  // block containing the icmp:
+
+  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+    return false;
+
+#ifndef NDEBUG
+  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+  // for now:
+  if (AddI->hasOneUse())
+    assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+  Module *M = CI->getParent()->getParent()->getParent();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+  auto *UAddWithOverflow =
+      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  auto *Overflow =
+      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+  CI->replaceAllUsesWith(Overflow);
+  AddI->replaceAllUsesWith(UAdd);
+  CI->eraseFromParent();
+  AddI->eraseFromParent();
+  return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
 /// the number of virtual registers that must be created and coalesced.  This is
 /// a clear win except on targets with multiple condition code registers
 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
   BasicBlock *DefBB = CI->getParent();
 
   /// InsertedCmp - Only insert a cmp in each block once.
@@ -649,21 +869,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
         CmpInst::Create(CI->getOpcode(),
                         CI->getPredicate(),  CI->getOperand(0),
                         CI->getOperand(1), "", InsertPt);
-      MadeChange = true;
     }
 
     // Replace a use of the cmp with a use of the new cmp.
     TheUse = InsertedCmp;
+    MadeChange = true;
     ++NumCmpUses;
   }
 
   // If we removed all uses, nuke the cmp.
-  if (CI->use_empty())
+  if (CI->use_empty()) {
     CI->eraseFromParent();
+    MadeChange = true;
+  }
 
   return MadeChange;
 }
 
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  if (SinkCmpExpression(CI))
+    return true;
+
+  if (CombineUAddWithOverflow(CI))
+    return true;
+
+  return false;
+}
+
 /// isExtractBitsCandidateUse - Check if the candidates could
 /// be combined with shift instruction, which includes:
 /// 1. Truncate instruction
@@ -943,8 +1175,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
     //
     CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
     Builder.SetInsertPoint(InsertPt);
-    
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     LoadInst* Load = Builder.CreateLoad(Gep, false);
     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
 
@@ -1038,7 +1271,8 @@ static void ScalarizeMaskedStore(CallInst *CI) {
     Builder.SetInsertPoint(InsertPt);
     
     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     Builder.CreateStore(OneElt, Gep);
 
     // Create "else" block, fill it in the next iteration
@@ -1072,6 +1306,54 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       return true;
   }
 
+  const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
+
+  // Align the pointer arguments to this call if the target thinks it's a good
+  // idea
+  unsigned MinSize, PrefAlign;
+  if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+    for (auto &Arg : CI->arg_operands()) {
+      // We want to align both objects whose address is used directly and
+      // objects whose address is used in casts and GEPs, though it only makes
+      // sense for GEPs if the offset is a multiple of the desired alignment and
+      // if size - offset meets the size threshold.
+      if (!Arg->getType()->isPointerTy())
+        continue;
+      APInt Offset(TD->getPointerSizeInBits(
+                     cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
+      Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+      uint64_t Offset2 = Offset.getLimitedValue();
+      if ((Offset2 & (PrefAlign-1)) != 0)
+        continue;
+      AllocaInst *AI;
+      if ((AI = dyn_cast<AllocaInst>(Val)) &&
+          AI->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+        AI->setAlignment(PrefAlign);
+      // Global variables can only be aligned if they are defined in this
+      // object (i.e. they are uniquely initialized in this object), and
+      // over-aligning global variables that have an explicit section is
+      // forbidden.
+      GlobalVariable *GV;
+      if ((GV = dyn_cast<GlobalVariable>(Val)) &&
+          GV->hasUniqueInitializer() &&
+          !GV->hasSection() &&
+          GV->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(
+            GV->getType()->getElementType()) >= MinSize + Offset2)
+        GV->setAlignment(PrefAlign);
+    }
+    // If this is a memcpy (or similar) then we may be able to improve the
+    // alignment
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+      unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+        Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+      if (Align > MI->getAlignment())
+        MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+    }
+  }
+
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
   if (II) {
     switch (II->getIntrinsicID()) {
@@ -1088,8 +1370,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       WeakVH IterHandle(CurInstIterator);
 
       replaceAndRecursivelySimplify(CI, RetVal,
-                                    TLI ? TLI->getDataLayout() : nullptr,
-                                    TLInfo, ModifiedDT ? nullptr : DT);
+                                    TLInfo, nullptr);
 
       // If the iterator instruction was recursively deleted, start over at the
       // start of the block.
@@ -1116,6 +1397,16 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       }
       return false;
     }
+    case Intrinsic::aarch64_stlxr:
+    case Intrinsic::aarch64_stxr: {
+      ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+      if (!ExtVal || !ExtVal->hasOneUse() ||
+          ExtVal->getParent() == CI->getParent())
+        return false;
+      // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
+      ExtVal->moveBefore(CI);
+      return true;
+    }
     }
 
     if (TLI) {
@@ -1131,15 +1422,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
   // From here on out we're working with named functions.
   if (!CI->getCalledFunction()) return false;
 
-  // We'll need DataLayout from here on out.
-  const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
-  if (!TD) return false;
-
   // Lower all default uses of _chk calls.  This is very similar
   // to what InstCombineCalls does, but here we are only lowering calls
   // to fortified library functions (e.g. __memcpy_chk) that have the default
   // "don't know" as the objectsize.  Anything else should be left alone.
-  FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true);
+  FortifiedLibCallSimplifier Simplifier(TLInfo, true);
   if (Value *V = Simplifier.optimizeCall(CI)) {
     CI->replaceAllUsesWith(V);
     CI->eraseFromParent();
@@ -1672,7 +1959,7 @@ class TypePromotionTransaction {
       Inst->removeFromParent();
     }
 
-    ~InstructionRemover() { delete Replacer; }
+    ~InstructionRemover() override { delete Replacer; }
 
     /// \brief Really remove the instruction.
     void commit() override { delete Inst; }
@@ -1802,6 +2089,7 @@ void TypePromotionTransaction::rollback(
 /// This encapsulates the logic for matching the target-legal addressing modes.
 class AddressingModeMatcher {
   SmallVectorImpl<Instruction*> &AddrModeInsts;
+  const TargetMachine &TM;
   const TargetLowering &TLI;
 
   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
@@ -1825,13 +2113,15 @@ class AddressingModeMatcher {
   /// always returns true.
   bool IgnoreProfitability;
 
-  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
-                        const TargetLowering &T, Type *AT,
-                        Instruction *MI, ExtAddrMode &AM,
-                        const SetOfInstrs &InsertedTruncs,
+  AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
+                        const TargetMachine &TM, Type *AT, Instruction *MI,
+                        ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs,
                         InstrToOrigTy &PromotedInsts,
                         TypePromotionTransaction &TPT)
-      : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
+      : AddrModeInsts(AMI), TM(TM),
+        TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
+                 ->getTargetLowering()),
+        AccessTy(AT), MemoryInst(MI), AddrMode(AM),
         InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
     IgnoreProfitability = false;
   }
@@ -1848,13 +2138,13 @@ public:
   static ExtAddrMode Match(Value *V, Type *AccessTy,
                            Instruction *MemoryInst,
                            SmallVectorImpl<Instruction*> &AddrModeInsts,
-                           const TargetLowering &TLI,
+                           const TargetMachine &TM,
                            const SetOfInstrs &InsertedTruncs,
                            InstrToOrigTy &PromotedInsts,
                            TypePromotionTransaction &TPT) {
     ExtAddrMode Result;
 
-    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+    bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy,
                                          MemoryInst, Result, InsertedTruncs,
                                          PromotedInsts, TPT).MatchAddr(V, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
@@ -1869,7 +2159,7 @@ private:
                                             ExtAddrMode &AMBefore,
                                             ExtAddrMode &AMAfter);
   bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
-  bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
+  bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
                              Value *PromotedOperand) const;
 };
 
@@ -2003,7 +2293,7 @@ class TypePromotionHelper {
   /// \brief Utility function to promote the operand of \p Ext when this
   /// operand is a promotable trunc or sext or zext.
   /// \p PromotedInsts maps the instructions to their type before promotion.
-  /// \p CreatedInsts[out] contains how many non-free instructions have been
+  /// \p CreatedInstsCost[out] contains the cost of all instructions
   /// created to promote the operand of Ext.
   /// Newly added extensions are inserted in \p Exts.
   /// Newly added truncates are inserted in \p Truncs.
@@ -2011,53 +2301,55 @@ class TypePromotionHelper {
   /// \return The promoted value which is used instead of Ext.
   static Value *promoteOperandForTruncAndAnyExt(
       Instruction *Ext, TypePromotionTransaction &TPT,
-      InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
       SmallVectorImpl<Instruction *> *Exts,
-      SmallVectorImpl<Instruction *> *Truncs);
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
 
   /// \brief Utility function to promote the operand of \p Ext when this
   /// operand is promotable and is not a supported trunc or sext.
   /// \p PromotedInsts maps the instructions to their type before promotion.
-  /// \p CreatedInsts[out] contains how many non-free instructions have been
+  /// \p CreatedInstsCost[out] contains the cost of all the instructions
   /// created to promote the operand of Ext.
   /// Newly added extensions are inserted in \p Exts.
   /// Newly added truncates are inserted in \p Truncs.
   /// Should never be called directly.
   /// \return The promoted value which is used instead of Ext.
-  static Value *
-  promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                         InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
-                         SmallVectorImpl<Instruction *> *Exts,
-                         SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
+  static Value *promoteOperandForOther(Instruction *Ext,
+                                       TypePromotionTransaction &TPT,
+                                       InstrToOrigTy &PromotedInsts,
+                                       unsigned &CreatedInstsCost,
+                                       SmallVectorImpl<Instruction *> *Exts,
+                                       SmallVectorImpl<Instruction *> *Truncs,
+                                       const TargetLowering &TLI, bool IsSExt);
 
   /// \see promoteOperandForOther.
-  static Value *
-  signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                            InstrToOrigTy &PromotedInsts,
-                            unsigned &CreatedInsts,
-                            SmallVectorImpl<Instruction *> *Exts,
-                            SmallVectorImpl<Instruction *> *Truncs) {
-    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
-                                  Truncs, true);
+  static Value *signExtendOperandForOther(
+      Instruction *Ext, TypePromotionTransaction &TPT,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+      SmallVectorImpl<Instruction *> *Exts,
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+                                  Exts, Truncs, TLI, true);
   }
 
   /// \see promoteOperandForOther.
-  static Value *
-  zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                            InstrToOrigTy &PromotedInsts,
-                            unsigned &CreatedInsts,
-                            SmallVectorImpl<Instruction *> *Exts,
-                            SmallVectorImpl<Instruction *> *Truncs) {
-    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
-                                  Truncs, false);
+  static Value *zeroExtendOperandForOther(
+      Instruction *Ext, TypePromotionTransaction &TPT,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+      SmallVectorImpl<Instruction *> *Exts,
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+                                  Exts, Truncs, TLI, false);
   }
 
 public:
   /// Type for the utility function that promotes the operand of Ext.
   typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
-                           InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+                           InstrToOrigTy &PromotedInsts,
+                           unsigned &CreatedInstsCost,
                            SmallVectorImpl<Instruction *> *Exts,
-                           SmallVectorImpl<Instruction *> *Truncs);
+                           SmallVectorImpl<Instruction *> *Truncs,
+                           const TargetLowering &TLI);
   /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
   /// action to promote the operand of \p Ext instead of using Ext.
   /// \return NULL if no promotable action is possible with the current
@@ -2174,16 +2466,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
 
 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
     llvm::Instruction *SExt, TypePromotionTransaction &TPT,
-    InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+    InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
     SmallVectorImpl<Instruction *> *Exts,
-    SmallVectorImpl<Instruction *> *Truncs) {
+    SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
   // By construction, the operand of SExt is an instruction. Otherwise we cannot
   // get through it and this method should not be called.
   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
   Value *ExtVal = SExt;
+  bool HasMergedNonFreeExt = false;
   if (isa<ZExtInst>(SExtOpnd)) {
     // Replace s|zext(zext(opnd))
     // => zext(opnd).
+    HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
     Value *ZExt =
         TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
     TPT.replaceAllUsesWith(SExt, ZExt);
@@ -2194,7 +2488,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
     // => z|sext(opnd).
     TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
   }
-  CreatedInsts = 0;
+  CreatedInstsCost = 0;
 
   // Remove dead code.
   if (SExtOpnd->use_empty())
@@ -2203,8 +2497,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
   // Check if the extension is still needed.
   Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
   if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
-    if (ExtInst && Exts)
-      Exts->push_back(ExtInst);
+    if (ExtInst) {
+      if (Exts)
+        Exts->push_back(ExtInst);
+      CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
+    }
     return ExtVal;
   }
 
@@ -2217,13 +2514,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
 
 Value *TypePromotionHelper::promoteOperandForOther(
     Instruction *Ext, TypePromotionTransaction &TPT,
-    InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+    InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
     SmallVectorImpl<Instruction *> *Exts,
-    SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
+    SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
+    bool IsSExt) {
   // By construction, the operand of Ext is an instruction. Otherwise we cannot
   // get through it and this method should not be called.
   Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
-  CreatedInsts = 0;
+  CreatedInstsCost = 0;
   if (!ExtOpnd->hasOneUse()) {
     // ExtOpnd will be promoted.
     // All its uses, but Ext, will need to use a truncated value of the
@@ -2298,7 +2596,6 @@ Value *TypePromotionHelper::promoteOperandForOther(
         continue;
       }
       ExtForOpnd = cast<Instruction>(ValForExtOpnd);
-      ++CreatedInsts;
     }
     if (Exts)
       Exts->push_back(ExtForOpnd);
@@ -2307,6 +2604,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
     // Move the sign extension before the insertion point.
     TPT.moveBefore(ExtForOpnd, ExtOpnd);
     TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
+    CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
     // If more sext are required, new instructions will have to be created.
     ExtForOpnd = nullptr;
   }
@@ -2319,22 +2617,22 @@ Value *TypePromotionHelper::promoteOperandForOther(
 
 /// IsPromotionProfitable - Check whether or not promoting an instruction
 /// to a wider type was profitable.
-/// \p MatchedSize gives the number of instructions that have been matched
-/// in the addressing mode after the promotion was applied.
-/// \p SizeWithPromotion gives the number of created instructions for
-/// the promotion plus the number of instructions that have been
-/// matched in the addressing mode before the promotion.
+/// \p NewCost gives the cost of extension instructions created by the
+/// promotion.
+/// \p OldCost gives the cost of extension instructions before the promotion
+/// plus the number of instructions that have been
+/// matched in the addressing mode the promotion.
 /// \p PromotedOperand is the value that has been promoted.
 /// \return True if the promotion is profitable, false otherwise.
-bool
-AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
-                                             unsigned SizeWithPromotion,
-                                             Value *PromotedOperand) const {
-  // We folded less instructions than what we created to promote the operand.
+bool AddressingModeMatcher::IsPromotionProfitable(
+    unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
+  DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
+  // The cost of the new extensions is greater than the cost of the
+  // old extension plus what we folded.
   // This is not profitable.
-  if (MatchedSize < SizeWithPromotion)
+  if (NewCost > OldCost)
     return false;
-  if (MatchedSize > SizeWithPromotion)
+  if (NewCost < OldCost)
     return true;
   // The promotion is neutral but it may help folding the sign extension in
   // loads for instance.
@@ -2374,7 +2672,6 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
       return MatchAddr(AddrInst->getOperand(0), Depth);
     return false;
   case Instruction::BitCast:
-  case Instruction::AddrSpaceCast:
     // BitCast is always a noop, and we can handle it as long as it is
     // int->int or pointer->pointer (we don't want int<->fp or something).
     if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
@@ -2385,6 +2682,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
         AddrInst->getOperand(0)->getType() != AddrInst->getType())
       return MatchAddr(AddrInst->getOperand(0), Depth);
     return false;
+  case Instruction::AddrSpaceCast: {
+    unsigned SrcAS
+      = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
+    unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
+    if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  }
   case Instruction::Add: {
     // Check to see if we can merge in the RHS then the LHS.  If so, we win.
     ExtAddrMode BackupAddrMode = AddrMode;
@@ -2532,9 +2837,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
 
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
-    unsigned CreatedInsts = 0;
+    unsigned CreatedInstsCost = 0;
+    unsigned ExtCost = !TLI.isExtFree(Ext);
     Value *PromotedOperand =
-        TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
+        TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
     // SExt has been moved away.
     // Thus either it will be rematched later in the recursive calls or it is
     // gone. Anyway, we must not fold it into the addressing mode at this point.
@@ -2556,7 +2862,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     unsigned OldSize = AddrModeInsts.size();
 
     if (!MatchAddr(PromotedOperand, Depth) ||
-        !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
+        // The total of the new cost is equals to the cost of the created
+        // instructions.
+        // The total of the old cost is equals to the cost of the extension plus
+        // what we have saved in the addressing mode.
+        !IsPromotionProfitable(CreatedInstsCost,
+                               ExtCost + (AddrModeInsts.size() - OldSize),
                                PromotedOperand)) {
       AddrMode = BackupAddrMode;
       AddrModeInsts.resize(OldSize);
@@ -2658,13 +2969,17 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
 /// inline asm call are due to memory operands.  If so, return true, otherwise
 /// return false.
 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
-                                    const TargetLowering &TLI) {
-  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+                                    const TargetMachine &TM) {
+  const Function *F = CI->getParent()->getParent();
+  const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
+  const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
+  TargetLowering::AsmOperandInfoVector TargetConstraints =
+      TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
 
     // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+    TLI->ComputeConstraintToUse(OpInfo, SDValue());
 
     // If this asm operand is our Value*, and if it isn't an indirect memory
     // operand, we can't fold it!
@@ -2680,10 +2995,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
 /// memory use.  If we find an obviously non-foldable instruction, return true.
 /// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
-                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
-                              SmallPtrSetImpl<Instruction*> &ConsideredInsts,
-                              const TargetLowering &TLI) {
+static bool FindAllMemoryUses(
+    Instruction *I,
+    SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+    SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
   // If we already considered this instruction, we're done.
   if (!ConsideredInsts.insert(I).second)
     return false;
@@ -2713,12 +3028,12 @@ static bool FindAllMemoryUses(Instruction *I,
       if (!IA) return true;
 
       // If this is a memory operand, we're cool, otherwise bail out.
-      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+      if (!IsOperandAMemoryOperand(CI, IA, I, TM))
         return true;
       continue;
     }
 
-    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
+    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
       return true;
   }
 
@@ -2806,7 +3121,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
   // uses.
   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
   SmallPtrSet<Instruction*, 16> ConsideredInsts;
-  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
     return false;  // Has a non-memory, non-foldable use!
 
   // Now that we know that all uses of this instruction are part of a chain of
@@ -2831,7 +3146,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
     ExtAddrMode Result;
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
-    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy,
                                   MemoryInst, Result, InsertedTruncs,
                                   PromotedInsts, TPT);
     Matcher.IgnoreProfitability = true;
@@ -2906,15 +3221,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
     // For a PHI node, push all of its incoming values.
     if (PHINode *P = dyn_cast<PHINode>(V)) {
-      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
-        worklist.push_back(P->getIncomingValue(i));
+      for (Value *IncValue : P->incoming_values())
+        worklist.push_back(IncValue);
       continue;
     }
 
     // For non-PHIs, determine the addressing mode being computed.
     SmallVector<Instruction*, 16> NewAddrModeInsts;
     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
-        V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
+        V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet,
         PromotedInsts, TPT);
 
     // This check is broken into two cases with very similar code to avoid using
@@ -2989,8 +3304,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                  << *MemoryInst << "\n");
     if (SunkAddr->getType() != Addr->getType())
       SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
-  } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
-               TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
+  } else if (AddrSinkUsingGEPs ||
+             (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
+              TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
+                  ->useAA())) {
     // By default, we use the GEP-based method when AA is used later. This
     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -3041,7 +3358,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       return false;
     } else {
       Type *I8PtrTy =
-        Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+          Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+      Type *I8Ty = Builder.getInt8Ty();
 
       // Start with the base register. Do this first so that subsequent address
       // matching finds it last, which will prevent it from trying to match it
@@ -3093,7 +3411,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
           // SDAG consecutive load/store merging.
           if (ResultPtr->getType() != I8PtrTy)
             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-          ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+          ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
         }
 
         ResultIndex = V;
@@ -3104,7 +3422,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       } else {
         if (ResultPtr->getType() != I8PtrTy)
           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-        SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+        SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
       }
 
       if (SunkAddr->getType() != Addr->getType())
@@ -3213,8 +3531,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
   bool MadeChange = false;
 
+  const TargetRegisterInfo *TRI =
+      TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
   TargetLowering::AsmOperandInfoVector
-    TargetConstraints = TLI->ParseConstraints(CS);
+    TargetConstraints = TLI->ParseConstraints(TRI, CS);
   unsigned ArgNo = 0;
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -3308,7 +3628,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
                                     LoadInst *&LI, Instruction *&Inst,
                                     const SmallVectorImpl<Instruction *> &Exts,
-                                    unsigned CreatedInsts = 0) {
+                                    unsigned CreatedInstsCost = 0) {
   // Iterate over all the extensions to see if one form an ext(load).
   for (auto I : Exts) {
     // Check if we directly have ext(load).
@@ -3330,10 +3650,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
     SmallVector<Instruction *, 4> NewExts;
-    unsigned NewCreatedInsts = 0;
+    unsigned NewCreatedInstsCost = 0;
+    unsigned ExtCost = !TLI->isExtFree(I);
     // Promote.
-    Value *PromotedVal =
-        TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+    Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
+                             &NewExts, nullptr, *TLI);
     assert(PromotedVal &&
            "TypePromotionHelper should have filtered out those cases");
 
@@ -3343,9 +3664,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
     // With exactly 2, the transformation is neutral, because we will merge
     // one extension but leave one. However, we optimistically keep going,
     // because the new extension may be removed too.
-    unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+    long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
+    TotalCreatedInstsCost -= ExtCost;
     if (!StressExtLdPromotion &&
-        (TotalCreatedInsts > 1 ||
+        (TotalCreatedInstsCost > 1 ||
          !isPromotedInstructionLegal(*TLI, PromotedVal))) {
       // The promotion is not profitable, rollback to the previous state.
       TPT.rollback(LastKnownGood);
@@ -3353,8 +3675,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
     }
     // The promotion is profitable.
     // Check if it exposes an ext(load).
-    (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
-    if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+    (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+    if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
                // If we have created a new extension, i.e., now we have two
                // extensions. We must make sure one of them is merged with
                // the load, otherwise we may degrade the code quality.
@@ -3969,148 +4291,6 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
   Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
 }
 
-// See if we can speculate calls to intrinsic cttz/ctlz.
-//
-// Example:
-// entry:
-//   ...
-//   %cmp = icmp eq i64 %val, 0
-//   br i1 %cmp, label %end.bb, label %then.bb
-//
-// then.bb:
-//   %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true)
-//   br label %EndBB
-//
-// end.bb:
-//   %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ]
-//
-// ==>
-//
-// entry:
-//   ...
-//   %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false)
-//
-static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
-  assert(BrInst->isConditional() && "Expected a conditional branch!");
-  BasicBlock *ThenBB = BrInst->getSuccessor(1);
-  BasicBlock *EndBB = BrInst->getSuccessor(0);
-
-  // See if ThenBB contains only one instruction (excluding the
-  // terminator and DbgInfoIntrinsic calls).
-  IntrinsicInst *II = nullptr;
-  CastInst *CI = nullptr;
-  for (BasicBlock::iterator I = ThenBB->begin(),
-                            E = std::prev(ThenBB->end()); I != E; ++I) {
-    // Skip debug info.
-    if (isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    // Check if this is a zero extension or a truncate of a previously
-    // matched call to intrinsic cttz/ctlz.
-    if (II) {
-      // Early exit if we already found a "free" zero extend/truncate.
-      if (CI)
-        return false;
-
-      Type *SrcTy = II->getType();
-      Type *DestTy = I->getType();
-      Value *V;
- 
-      if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) {
-        // Speculate this zero extend only if it is "free" for the target.
-        if (TLI.isZExtFree(SrcTy, DestTy)) {
-          CI = cast<CastInst>(I);
-          continue;
-        }
-      } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) {
-        // Speculate this truncate only if it is "free" for the target.
-        if (TLI.isTruncateFree(SrcTy, DestTy)) {
-          CI = cast<CastInst>(I);
-          continue;
-        }
-      } else {
-        // Avoid speculating more than one instruction.
-        return false;
-      }
-    }
-
-    // See if this is a call to intrinsic cttz/ctlz.
-    if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
-      // Avoid speculating expensive intrinsic calls.
-      if (!TLI.isCheapToSpeculateCttz())
-        return false;
-    }
-    else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) {
-      // Avoid speculating expensive intrinsic calls.
-      if (!TLI.isCheapToSpeculateCtlz())
-        return false;
-    } else
-      return false;
-    
-    II = cast<IntrinsicInst>(I);
-  }
-
-  // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'.
-  BasicBlock *EntryBB = BrInst->getParent();
-  for (BasicBlock::iterator I = EndBB->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
-    Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
-
-    if (!OrigV)
-      return false;
-
-    if (ThenV != II && (!CI || ThenV != CI))
-      return false;
-    
-    if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
-      unsigned BitWidth = II->getType()->getIntegerBitWidth();
-
-      // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
-      // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
-      // of the value in input to the cttz/ctlz.
-      if (CInt->getValue() != BitWidth)
-        return false;
-
-      // Hoist the call to cttz/ctlz from ThenBB into EntryBB.
-      EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(),
-                                    ThenBB->begin(), std::prev(ThenBB->end()));
- 
-      // Update PN setting ThenV as the incoming value from both 'EntryBB'
-      // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this
-      // phi node if all the incoming values are the same.
-      PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV);
-      PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV);
-
-      // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call.
-      if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) {
-        Type *Ty = II->getArgOperand(0)->getType();
-        Value *Args[] = { II->getArgOperand(0),
-                          ConstantInt::getFalse(II->getContext()) };
-        Module *M = EntryBB->getParent()->getParent();
-        Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
-        IRBuilder<> Builder(II);
-        Instruction *NewI = Builder.CreateCall(IF, Args);
-
-        // Replace the old call to cttz/ctlz.
-        II->replaceAllUsesWith(NewI);
-        II->eraseFromParent();
-      }
- 
-      // Update BrInst condition so that the branch to EndBB is always taken.
-      // Later on, method 'ConstantFoldTerminator' will simplify this branch
-      // replacing it with a direct branch to 'EndBB'.
-      // As a side effect, CodeGenPrepare will attempt to simplify the control
-      // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into
-      // 'EndBB' (calling method 'EliminateFallThrough').
-      BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext()));
-      return true;
-    }
-  }
-
-  return false;
-}
-
 /// Some targets can do store(extractelement) with one instruction.
 /// Try to push the extractelement towards the stores when the target
 /// has this feature and this is profitable.
@@ -4171,8 +4351,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
     // It is possible for very late stage optimizations (such as SimplifyCFG)
     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
     // trivial PHI, go ahead and zap it here.
-    if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
-                                       TLInfo, DT)) {
+    const DataLayout &DL = I->getModule()->getDataLayout();
+    if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
       P->replaceAllUsesWith(V);
       P->eraseFromParent();
       ++NumPHIsElim;
@@ -4263,34 +4443,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
   if (isa<ExtractElementInst>(I))
     return OptimizeExtractElementInst(I);
 
-  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-    if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) {
-      // Check if the branch condition compares a value agaist zero.
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
-        if (ICI->getPredicate() == ICmpInst::ICMP_EQ &&
-            match(ICI->getOperand(1), m_Zero())) {
-          BasicBlock *ThenBB = BI->getSuccessor(1);
-          BasicBlock *EndBB = BI->getSuccessor(0);
-
-          // Check if ThenBB is only reachable from this basic block; also,
-          // check if EndBB has more than one predecessor.
-          if (ThenBB->getSinglePredecessor() &&
-              !EndBB->getSinglePredecessor()) {
-            TerminatorInst *TI = ThenBB->getTerminator();
-
-            if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB &&
-                // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'.
-                OptimizeBranchInst(BI, *TLI)) {
-              ModifiedDT = true;
-              return true;
-            }
-          }
-        }
-      }
-    }
-    return false;
-  }
-
   return false;
 }
 
@@ -4469,8 +4621,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
 ///
 bool CodeGenPrepare::splitBranchCondition(Function &F) {
-  if (!TM || TM->Options.EnableFastISel != true ||
-      !TLI || TLI->isJumpExpensive())
+  if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
     return false;
 
   bool MadeChange = false;
@@ -4631,10 +4782,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
       }
     }
 
-    // Request DOM Tree update.
     // Note: No point in getting fancy here, since the DT info is never
-    // available to CodeGenPrepare and the existing update code is broken
-    // anyways.
+    // available to CodeGenPrepare.
     ModifiedDT = true;
 
     MadeChange = true;
diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
new file mode 100644
index 0000000..28c97ba
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
@@ -0,0 +1,54 @@
+//===-- CoreCLRGC.cpp - CoreCLR Runtime GC Strategy -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a GCStrategy for the CoreCLR Runtime.
+// The strategy is similar to Statepoint-example GC, but differs from it in
+// certain aspects, such as:
+// 1) Base-pointers need not be explicitly tracked and reported for
+//    interior pointers
+// 2) Uses a different format for encoding stack-maps
+// 3) Location of Safe-point polls: polls are only needed before loop-back edges
+//    and before tail-calls (not needed at function-entry)
+//
+// The above differences in behavior are to be implemented in upcoming checkins.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+namespace {
+class CoreCLRGC : public GCStrategy {
+public:
+  CoreCLRGC() {
+    UseStatepoints = true;
+    // These options are all gc.root specific, we specify them so that the
+    // gc.root lowering code doesn't run.
+    InitRoots = false;
+    NeededSafePoints = 0;
+    UsesMetadata = false;
+    CustomRoots = false;
+  }
+  Optional<bool> isGCManagedPointer(const Value *V) const override {
+    // Method is only valid on pointer typed values.
+    PointerType *PT = cast<PointerType>(V->getType());
+    // We pick addrspace(1) as our GC managed heap.
+    return (1 == PT->getAddressSpace());
+  }
+};
+}
+
+static GCRegistry::Add<CoreCLRGC> X("coreclr", "CoreCLR-compatible GC");
+
+namespace llvm {
+void linkCoreCLRGC() {}
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
index ceef74d..af011a0 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -69,7 +69,7 @@ class TargetRegisterInfo;
 
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
-    ~CriticalAntiDepBreaker();
+    ~CriticalAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB) override;
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index c17a35d..963d573 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -65,7 +65,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
 
   // Don't delete instructions with side effects.
   bool SawStore = false;
-  if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI())
+  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI())
     return false;
 
   // Examine each operand.
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 75b74d9..42656fb 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "dwarfehprepare"
@@ -39,15 +39,28 @@ namespace {
     // RewindFunction - _Unwind_Resume or the target equivalent.
     Constant *RewindFunction;
 
+    DominatorTree *DT;
+    const TargetLowering *TLI;
+
     bool InsertUnwindResumeCalls(Function &Fn);
     Value *GetExceptionObject(ResumeInst *RI);
+    size_t
+    pruneUnreachableResumes(Function &Fn,
+                            SmallVectorImpl<ResumeInst *> &Resumes,
+                            SmallVectorImpl<LandingPadInst *> &CleanupLPads);
 
   public:
     static char ID; // Pass identification, replacement for typeid.
+
+    // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in
+    // practice.
+    DwarfEHPrepare()
+        : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr),
+          TLI(nullptr) {}
+
     DwarfEHPrepare(const TargetMachine *TM)
-        : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {
-      initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
-    }
+        : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr),
+          TLI(nullptr) {}
 
     bool runOnFunction(Function &Fn) override;
 
@@ -56,7 +69,7 @@ namespace {
       return false;
     }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override { }
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
 
     const char *getPassName() const override {
       return "Exception handling preparation";
@@ -65,11 +78,22 @@ namespace {
 } // end anonymous namespace
 
 char DwarfEHPrepare::ID = 0;
+INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
+                         "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare",
+                       "Prepare DWARF exceptions", false, false)
 
 FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
   return new DwarfEHPrepare(TM);
 }
 
+void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfoWrapperPass>();
+  AU.addRequired<DominatorTreeWrapperPass>();
+}
+
 /// GetExceptionObject - Return the exception object from the value passed into
 /// the 'resume' instruction (typically an aggregate). Clean up any dead
 /// instructions, including the 'resume' instruction.
@@ -99,34 +123,93 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
   RI->eraseFromParent();
 
   if (EraseIVIs) {
-    if (SelIVI->getNumUses() == 0)
+    if (SelIVI->use_empty())
       SelIVI->eraseFromParent();
-    if (ExcIVI->getNumUses() == 0)
+    if (ExcIVI->use_empty())
       ExcIVI->eraseFromParent();
-    if (SelLoad && SelLoad->getNumUses() == 0)
+    if (SelLoad && SelLoad->use_empty())
       SelLoad->eraseFromParent();
   }
 
   return ExnObj;
 }
 
+/// Replace resumes that are not reachable from a cleanup landing pad with
+/// unreachable and then simplify those blocks.
+size_t DwarfEHPrepare::pruneUnreachableResumes(
+    Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes,
+    SmallVectorImpl<LandingPadInst *> &CleanupLPads) {
+  BitVector ResumeReachable(Resumes.size());
+  size_t ResumeIndex = 0;
+  for (auto *RI : Resumes) {
+    for (auto *LP : CleanupLPads) {
+      if (isPotentiallyReachable(LP, RI, DT)) {
+        ResumeReachable.set(ResumeIndex);
+        break;
+      }
+    }
+    ++ResumeIndex;
+  }
+
+  // If everything is reachable, there is no change.
+  if (ResumeReachable.all())
+    return Resumes.size();
+
+  const TargetTransformInfo &TTI =
+      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
+  LLVMContext &Ctx = Fn.getContext();
+
+  // Otherwise, insert unreachable instructions and call simplifycfg.
+  size_t ResumesLeft = 0;
+  for (size_t I = 0, E = Resumes.size(); I < E; ++I) {
+    ResumeInst *RI = Resumes[I];
+    if (ResumeReachable[I]) {
+      Resumes[ResumesLeft++] = RI;
+    } else {
+      BasicBlock *BB = RI->getParent();
+      new UnreachableInst(Ctx, RI);
+      RI->eraseFromParent();
+      SimplifyCFG(BB, TTI, 1);
+    }
+  }
+  Resumes.resize(ResumesLeft);
+  return ResumesLeft;
+}
+
 /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
 /// into calls to the appropriate _Unwind_Resume function.
 bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   SmallVector<ResumeInst*, 16> Resumes;
-  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    TerminatorInst *TI = I->getTerminator();
-    if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
+  SmallVector<LandingPadInst*, 16> CleanupLPads;
+  bool FoundLP = false;
+  for (BasicBlock &BB : Fn) {
+    if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
       Resumes.push_back(RI);
+    if (auto *LP = BB.getLandingPadInst()) {
+      if (LP->isCleanup())
+        CleanupLPads.push_back(LP);
+      // Check the personality on the first landingpad. Don't do anything if
+      // it's for MSVC.
+      if (!FoundLP) {
+        FoundLP = true;
+        EHPersonality Pers = classifyEHPersonality(LP->getPersonalityFn());
+        if (isMSVCEHPersonality(Pers))
+          return false;
+      }
+    }
   }
 
   if (Resumes.empty())
     return false;
 
+  LLVMContext &Ctx = Fn.getContext();
+
+  size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+  if (ResumesLeft == 0)
+    return true; // We pruned them all.
+
   // Find the rewind function if we didn't already.
-  const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
   if (!RewindFunction) {
-    LLVMContext &Ctx = Resumes[0]->getContext();
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
                                           Type::getInt8PtrTy(Ctx), false);
     const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
@@ -134,10 +217,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   }
 
   // Create the basic block where the _Unwind_Resume call will live.
-  LLVMContext &Ctx = Fn.getContext();
-  unsigned ResumesSize = Resumes.size();
-
-  if (ResumesSize == 1) {
+  if (ResumesLeft == 1) {
     // Instead of creating a new BB and PHI node, just append the call to
     // _Unwind_Resume to the end of the single resume block.
     ResumeInst *RI = Resumes.front();
@@ -154,14 +234,12 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   }
 
   BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
-  PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
+  PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft,
                                 "exn.obj", UnwindBB);
 
   // Extract the exception object from the ResumeInst and add it to the PHI node
   // that feeds the _Unwind_Resume call.
-  for (SmallVectorImpl<ResumeInst*>::iterator
-         I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
-    ResumeInst *RI = *I;
+  for (ResumeInst *RI : Resumes) {
     BasicBlock *Parent = RI->getParent();
     BranchInst::Create(UnwindBB, Parent);
 
@@ -181,6 +259,11 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
 }
 
 bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+  assert(TM && "DWARF EH preparation requires a target machine");
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
   bool Changed = InsertUnwindResumeCalls(Fn);
+  DT = nullptr;
+  TLI = nullptr;
   return Changed;
 }
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 995606f..092b7f8 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -220,7 +220,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
 
     // We never speculate stores, so an AA pointer isn't necessary.
     bool DontMoveAcrossStore = true;
-    if (!I->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) {
+    if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) {
       DEBUG(dbgs() << "Can't speculate: " << *I);
       return false;
     }
@@ -777,15 +777,13 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
                << "********** Function: " << MF.getName() << '\n');
   // Only run if conversion if the target wants it.
-  if (!MF.getTarget()
-           .getSubtarget<TargetSubtargetInfo>()
-           .enableEarlyIfConversion())
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  if (!STI.enableEarlyIfConversion())
     return false;
 
-  TII = MF.getSubtarget().getInstrInfo();
-  TRI = MF.getSubtarget().getRegisterInfo();
-  SchedModel =
-    MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
+  TII = STI.getInstrInfo();
+  TRI = STI.getRegisterInfo();
+  SchedModel = STI.getSchedModel();
   MRI = &MF.getRegInfo();
   DomTree = &getAnalysis<MachineDominatorTree>();
   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
@@ -799,9 +797,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
   // if-conversion in a single pass. The tryConvertIf() function may erase
   // blocks, but only blocks dominated by the head block. This makes it safe to
   // update the dominator tree while the post-order iterator is still active.
-  for (po_iterator<MachineDominatorTree*>
-       I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
-    if (tryConvertIf(I->getBlock()))
+  for (auto DomNode : post_order(DomTree))
+    if (tryConvertIf(DomNode->getBlock()))
       Changed = true;
 
   return Changed;
diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
index 85b0893..024946d 100644
--- a/contrib/llvm/lib/CodeGen/ErlangGC.cpp
+++ b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
@@ -27,56 +27,20 @@ using namespace llvm;
 
 namespace {
 
-  class ErlangGC : public GCStrategy {
-    MCSymbol *InsertLabel(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          DebugLoc DL) const;
-  public:
-    ErlangGC();
-    bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) override;
-  };
-
+class ErlangGC : public GCStrategy {
+public:
+  ErlangGC();
+};
 }
 
-static GCRegistry::Add<ErlangGC>
-X("erlang", "erlang-compatible garbage collector");
+static GCRegistry::Add<ErlangGC> X("erlang",
+                                   "erlang-compatible garbage collector");
 
-void llvm::linkErlangGC() { }
+void llvm::linkErlangGC() {}
 
 ErlangGC::ErlangGC() {
   InitRoots = false;
   NeededSafePoints = 1 << GC::PostCall;
   UsesMetadata = true;
   CustomRoots = false;
-  CustomSafePoints = true;
-}
-
-MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MI,
-                                DebugLoc DL) const {
-  const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
-  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
-  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
-  return Label;
-}
-
-bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
-  for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
-       ++BBI)
-    for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
-         MI != ME; ++MI)
-
-      if (MI->getDesc().isCall()) {
-
-        // Do not treat tail call sites as safe points.
-        if (MI->getDesc().isTerminator())
-          continue;
-
-        /* Code copied from VisitCallPoint(...) */
-        MachineBasicBlock::iterator RAI = MI; ++RAI;
-        MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
-        FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
-      }
-
-  return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index b3a22c8..5b09cf1 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -113,7 +113,7 @@ struct DomainValue {
 }
 
 namespace {
-/// LiveReg - Information about a live register.
+/// Information about a live register.
 struct LiveReg {
   /// Value currently in this register, or NULL when no value is being tracked.
   /// This counts as a DomainValue reference.
@@ -125,7 +125,7 @@ struct LiveReg {
   /// will be a negative number.
   int Def;
 };
-} // anonynous namespace
+} // anonymous namespace
 
 namespace {
 class ExeDepsFix : public MachineFunctionPass {
@@ -174,7 +174,7 @@ public:
 
 private:
   iterator_range<SmallVectorImpl<int>::const_iterator>
-  regIndizes(unsigned Reg) const;
+  regIndices(unsigned Reg) const;
 
   // DomainValue allocation.
   DomainValue *alloc(int domain = -1);
@@ -205,10 +205,10 @@ private:
 
 char ExeDepsFix::ID = 0;
 
-/// Translate TRI register number to a list of indizes into our stmaller tables
+/// Translate TRI register number to a list of indices into our smaller tables
 /// of interesting registers.
 iterator_range<SmallVectorImpl<int>::const_iterator>
-ExeDepsFix::regIndizes(unsigned Reg) const {
+ExeDepsFix::regIndices(unsigned Reg) const {
   assert(Reg < AliasMap.size() && "Invalid register");
   const auto &Entry = AliasMap[Reg];
   return make_range(Entry.begin(), Entry.end());
@@ -225,7 +225,7 @@ DomainValue *ExeDepsFix::alloc(int domain) {
   return dv;
 }
 
-/// release - Release a reference to DV.  When the last reference is released,
+/// Release a reference to DV.  When the last reference is released,
 /// collapse if needed.
 void ExeDepsFix::release(DomainValue *DV) {
   while (DV) {
@@ -245,8 +245,8 @@ void ExeDepsFix::release(DomainValue *DV) {
   }
 }
 
-/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
-/// reached.  Update the referenced pointer when necessary.
+/// Follow the chain of dead DomainValues until a live DomainValue is reached.
+/// Update the referenced pointer when necessary.
 DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
   DomainValue *DV = DVRef;
   if (!DV || !DV->Next)
@@ -325,8 +325,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
         setLiveReg(rx, alloc(domain));
 }
 
-/// Merge - All instructions and registers in B are moved to A, and B is
-/// released.
+/// All instructions and registers in B are moved to A, and B is released.
 bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
   assert(!A->isCollapsed() && "Cannot merge into collapsed");
   assert(!B->isCollapsed() && "Cannot merge from collapsed");
@@ -352,7 +351,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
   return true;
 }
 
-// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+/// Set up LiveRegs by merging predecessor live-out values.
 void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
   // Detect back-edges from predecessors we haven't processed yet.
   SeenUnknownBackEdge = false;
@@ -378,7 +377,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
   if (MBB->pred_empty()) {
     for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
          e = MBB->livein_end(); i != e; ++i) {
-      for (int rx : regIndizes(*i)) {
+      for (int rx : regIndices(*i)) {
         // Treat function live-ins as if they were defined just before the first
         // instruction.  Usually, function arguments are set up immediately
         // before the call.
@@ -475,7 +474,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
 bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
                                        unsigned Pref) {
   unsigned reg = MI->getOperand(OpIdx).getReg();
-  for (int rx : regIndizes(reg)) {
+  for (int rx : regIndices(reg)) {
     unsigned Clearance = CurInstr - LiveRegs[rx].Def;
     DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
 
@@ -521,7 +520,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
       break;
     if (MO.isUse())
       continue;
-    for (int rx : regIndizes(MO.getReg())) {
+    for (int rx : regIndices(MO.getReg())) {
       // This instruction explicitly defines rx.
       DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
                    << '\t' << *MI);
@@ -587,7 +586,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
                 e = mi->getDesc().getNumOperands(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    for (int rx : regIndizes(mo.getReg())) {
+    for (int rx : regIndices(mo.getReg())) {
       force(rx, domain);
     }
   }
@@ -596,7 +595,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
   for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    for (int rx : regIndizes(mo.getReg())) {
+    for (int rx : regIndices(mo.getReg())) {
       kill(rx);
       force(rx, domain);
     }
@@ -616,7 +615,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
                   e = mi->getDesc().getNumOperands(); i != e; ++i) {
       MachineOperand &mo = mi->getOperand(i);
       if (!mo.isReg()) continue;
-      for (int rx : regIndizes(mo.getReg())) {
+      for (int rx : regIndices(mo.getReg())) {
         DomainValue *dv = LiveRegs[rx].Value;
         if (dv == nullptr)
           continue;
@@ -712,7 +711,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
                                   ii != ee; ++ii) {
     MachineOperand &mo = *ii;
     if (!mo.isReg()) continue;
-    for (int rx : regIndizes(mo.getReg())) {
+    for (int rx : regIndices(mo.getReg())) {
       if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
         kill(rx);
         setLiveReg(rx, dv);
diff --git a/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp
deleted file mode 100644
index 63c3699..0000000
--- a/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp
+++ /dev/null
@@ -1,374 +0,0 @@
-//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief A pass that instruments code with fast checks for indirect calls and
-/// hooks for a function to check violations.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "cfi"
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
-#include "llvm/CodeGen/JumpInstrTables.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-STATISTIC(NumCFIIndirectCalls,
-          "Number of indirect call sites rewritten by the CFI pass");
-
-char ForwardControlFlowIntegrity::ID = 0;
-INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi",
-                      "Control-Flow Integrity", true, true)
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTables);
-INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi",
-                    "Control-Flow Integrity", true, true)
-
-ModulePass *llvm::createForwardControlFlowIntegrityPass() {
-  return new ForwardControlFlowIntegrity();
-}
-
-ModulePass *llvm::createForwardControlFlowIntegrityPass(
-    JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
-    StringRef CFIFuncName) {
-  return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing,
-                                         CFIFuncName);
-}
-
-// Checks to see if a given CallSite is making an indirect call, including
-// cases where the indirect call is made through a bitcast.
-static bool isIndirectCall(CallSite &CS) {
-  if (CS.getCalledFunction())
-    return false;
-
-  // Check the value to see if it is merely a bitcast of a function. In
-  // this case, it will translate to a direct function call in the resulting
-  // assembly, so we won't treat it as an indirect call here.
-  const Value *V = CS.getCalledValue();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
-  }
-
-  // Otherwise, since we know it's a call, it must be an indirect call
-  return true;
-}
-
-static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning";
-
-ForwardControlFlowIntegrity::ForwardControlFlowIntegrity()
-    : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single),
-      CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") {
-  initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
-}
-
-ForwardControlFlowIntegrity::ForwardControlFlowIntegrity(
-    JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
-    std::string CFIFuncName)
-    : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType),
-      CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) {
-  initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
-}
-
-ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {}
-
-void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<JumpInstrTableInfo>();
-  AU.addRequired<JumpInstrTables>();
-}
-
-void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) {
-  // To get the indirect calls, we iterate over all functions and iterate over
-  // the list of basic blocks in each. We extract a total list of indirect calls
-  // before modifying any of them, since our modifications will modify the list
-  // of basic blocks.
-  for (Function &F : M) {
-    for (BasicBlock &BB : F) {
-      for (Instruction &I : BB) {
-        CallSite CS(&I);
-        if (!(CS && isIndirectCall(CS)))
-          continue;
-
-        Value *CalledValue = CS.getCalledValue();
-
-        // Don't rewrite this instruction if the indirect call is actually just
-        // inline assembly, since our transformation will generate an invalid
-        // module in that case.
-        if (isa<InlineAsm>(CalledValue))
-          continue;
-
-        IndirectCalls.push_back(&I);
-      }
-    }
-  }
-}
-
-void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M,
-                                                      CFITables &CFIT) {
-  Type *Int64Ty = Type::getInt64Ty(M.getContext());
-  for (Instruction *I : IndirectCalls) {
-    CallSite CS(I);
-    Value *CalledValue = CS.getCalledValue();
-
-    // Get the function type for this call and look it up in the tables.
-    Type *VTy = CalledValue->getType();
-    PointerType *PTy = dyn_cast<PointerType>(VTy);
-    Type *EltTy = PTy->getElementType();
-    FunctionType *FunTy = dyn_cast<FunctionType>(EltTy);
-    FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy);
-    ++NumCFIIndirectCalls;
-    Constant *JumpTableStart = nullptr;
-    Constant *JumpTableMask = nullptr;
-    Constant *JumpTableSize = nullptr;
-
-    // Some call sites have function types that don't correspond to any
-    // address-taken function in the module. This happens when function pointers
-    // are passed in from external code.
-    auto it = CFIT.find(TransformedTy);
-    if (it == CFIT.end()) {
-      // In this case, make sure that the function pointer will change by
-      // setting the mask and the start to be 0 so that the transformed
-      // function is 0.
-      JumpTableStart = ConstantInt::get(Int64Ty, 0);
-      JumpTableMask = ConstantInt::get(Int64Ty, 0);
-      JumpTableSize = ConstantInt::get(Int64Ty, 0);
-    } else {
-      JumpTableStart = it->second.StartValue;
-      JumpTableMask = it->second.MaskValue;
-      JumpTableSize = it->second.Size;
-    }
-
-    rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask,
-                           JumpTableSize);
-  }
-
-  return;
-}
-
-bool ForwardControlFlowIntegrity::runOnModule(Module &M) {
-  JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>();
-  Type *Int64Ty = Type::getInt64Ty(M.getContext());
-  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-
-  // JumpInstrTableInfo stores information about the alignment of each entry.
-  // The alignment returned by JumpInstrTableInfo is alignment in bytes, not
-  // in the exponent.
-  ByteAlignment = JITI->entryByteAlignment();
-  LogByteAlignment = llvm::Log2_64(ByteAlignment);
-
-  // Set up tables for control-flow integrity based on information about the
-  // jump-instruction tables.
-  CFITables CFIT;
-  for (const auto &KV : JITI->getTables()) {
-    uint64_t Size = static_cast<uint64_t>(KV.second.size());
-    uint64_t TableSize = NextPowerOf2(Size);
-
-    int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment;
-    Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue);
-    Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size);
-
-    // The base of the table is defined to be the first jumptable function in
-    // the table.
-    Function *First = KV.second.begin()->second;
-    Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy);
-    CFIT[KV.first].StartValue = JumpTableStartValue;
-    CFIT[KV.first].MaskValue = JumpTableMaskValue;
-    CFIT[KV.first].Size = JumpTableSize;
-  }
-
-  if (CFIT.empty())
-    return false;
-
-  getIndirectCalls(M);
-
-  if (!CFIEnforcing) {
-    addWarningFunction(M);
-  }
-
-  // Update the instructions with the check and the indirect jump through our
-  // table.
-  updateIndirectCalls(M, CFIT);
-
-  return true;
-}
-
-void ForwardControlFlowIntegrity::addWarningFunction(Module &M) {
-  PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext());
-
-  // Get the type of the Warning Function: void (i8*, i8*),
-  // where the first argument is the name of the function in which the violation
-  // occurs, and the second is the function pointer that violates CFI.
-  SmallVector<Type *, 2> WarningFunArgs;
-  WarningFunArgs.push_back(CharPtrTy);
-  WarningFunArgs.push_back(CharPtrTy);
-  FunctionType *WarningFunTy =
-      FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false);
-
-  if (!CFIFuncName.empty()) {
-    Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy);
-    if (!FailureFun)
-      report_fatal_error("Could not get or insert the function specified by"
-                         " -cfi-func-name");
-  } else {
-    // The default warning function swallows the warning and lets the call
-    // continue, since there's no generic way for it to print out this
-    // information.
-    Function *WarningFun = M.getFunction(cfi_failure_func_name);
-    if (!WarningFun) {
-      WarningFun =
-          Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage,
-                           cfi_failure_func_name, &M);
-    }
-
-    BasicBlock *Entry =
-        BasicBlock::Create(M.getContext(), "entry", WarningFun, 0);
-    ReturnInst::Create(M.getContext(), Entry);
-  }
-}
-
-void ForwardControlFlowIntegrity::rewriteFunctionPointer(
-    Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart,
-    Constant *JumpTableMask, Constant *JumpTableSize) {
-  IRBuilder<> TempBuilder(I);
-
-  Type *OrigFunType = FunPtr->getType();
-
-  BasicBlock *CurBB = cast<BasicBlock>(I->getParent());
-  Function *CurF = cast<Function>(CurBB->getParent());
-  Type *Int64Ty = Type::getInt64Ty(M.getContext());
-
-  Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty);
-  Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty);
-
-  Value *NewFunPtr = nullptr;
-  Value *Check = nullptr;
-  switch (CFIType) {
-  case CFIntegrity::Sub: {
-    // This is the subtract, mask, and add version.
-    // Subtract from the base.
-    Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
-
-    // Mask the difference to force this to be a table offset.
-    Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask);
-
-    // Add it back to the base.
-    Value *Result = TempBuilder.CreateAdd(And, TStartInt);
-
-    // Convert it back into a function pointer that we can call.
-    NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
-    break;
-  }
-  case CFIntegrity::Ror: {
-    // This is the subtract and rotate version.
-    // Rotate right by the alignment value. The optimizer should recognize
-    // this sequence as a rotation.
-
-    // This cast is safe, since unsigned is always a subset of uint64_t.
-    uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment);
-    Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64);
-    Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64);
-
-    // Subtract from the base.
-    Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
-
-    // Create the equivalent of a rotate-right instruction.
-    Value *Shr = TempBuilder.CreateLShr(Sub, RightShift);
-    Value *Shl = TempBuilder.CreateShl(Sub, LeftShift);
-    Value *Or = TempBuilder.CreateOr(Shr, Shl);
-
-    // Perform unsigned comparison to check for inclusion in the table.
-    Check = TempBuilder.CreateICmpULT(Or, JumpTableSize);
-    NewFunPtr = FunPtr;
-    break;
-  }
-  case CFIntegrity::Add: {
-    // This is the mask and add version.
-    // Mask the function pointer to turn it into an offset into the table.
-    Value *And = TempBuilder.CreateAnd(TI, JumpTableMask);
-
-    // Then or this offset to the base and get the pointer value.
-    Value *Result = TempBuilder.CreateAdd(And, TStartInt);
-
-    // Convert it back into a function pointer that we can call.
-    NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
-    break;
-  }
-  }
-
-  if (!CFIEnforcing) {
-    // If a check hasn't been added (in the rotation version), then check to see
-    // if it's the same as the original function. This check determines whether
-    // or not we call the CFI failure function.
-    if (!Check)
-      Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr);
-    BasicBlock *InvalidPtrBlock =
-        BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0);
-    BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I);
-
-    // Remove the unconditional branch that connects the two blocks.
-    TerminatorInst *TermInst = CurBB->getTerminator();
-    TermInst->eraseFromParent();
-
-    // Add a conditional branch that depends on the Check above.
-    BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB);
-
-    // Call the warning function for this pointer, then continue.
-    Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock);
-    insertWarning(M, InvalidPtrBlock, BI, FunPtr);
-  } else {
-    // Modify the instruction to call this value.
-    CallSite CS(I);
-    CS.setCalledFunction(NewFunPtr);
-  }
-}
-
-void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block,
-                                                Instruction *I, Value *FunPtr) {
-  Function *ParentFun = cast<Function>(Block->getParent());
-
-  // Get the function to call right before the instruction.
-  Function *WarningFun = nullptr;
-  if (CFIFuncName.empty()) {
-    WarningFun = M.getFunction(cfi_failure_func_name);
-  } else {
-    WarningFun = M.getFunction(CFIFuncName);
-  }
-
-  assert(WarningFun && "Could not find the CFI failure function");
-
-  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-
-  IRBuilder<> WarningInserter(I);
-  // Create a mergeable GlobalVariable containing the name of the function.
-  Value *ParentNameGV =
-      WarningInserter.CreateGlobalString(ParentFun->getName());
-  Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy);
-  Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy);
-  WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr);
-}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index 6101c67..c8116a4 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -24,22 +24,20 @@
 using namespace llvm;
 
 namespace {
-  
-  class Printer : public FunctionPass {
-    static char ID;
-    raw_ostream &OS;
-    
-  public:
-    explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
+class Printer : public FunctionPass {
+  static char ID;
+  raw_ostream &OS;
 
-    const char *getPassName() const override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
+public:
+  explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
-    bool runOnFunction(Function &F) override;
-    bool doFinalization(Module &M) override;
-  };
+  const char *getPassName() const override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
 
+  bool runOnFunction(Function &F) override;
+  bool doFinalization(Module &M) override;
+};
 }
 
 INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
@@ -48,7 +46,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
 // -----------------------------------------------------------------------------
 
 GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
-  : F(F), S(S), FrameSize(~0LL) {}
+    : F(F), S(S), FrameSize(~0LL) {}
 
 GCFunctionInfo::~GCFunctionInfo() {}
 
@@ -56,41 +54,19 @@ GCFunctionInfo::~GCFunctionInfo() {}
 
 char GCModuleInfo::ID = 0;
 
-GCModuleInfo::GCModuleInfo()
-    : ImmutablePass(ID) {
+GCModuleInfo::GCModuleInfo() : ImmutablePass(ID) {
   initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
-GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
-                                              const std::string &Name) {
-  strategy_map_type::iterator NMI = StrategyMap.find(Name);
-  if (NMI != StrategyMap.end())
-    return NMI->getValue();
-  
-  for (GCRegistry::iterator I = GCRegistry::begin(),
-                            E = GCRegistry::end(); I != E; ++I) {
-    if (Name == I->getName()) {
-      std::unique_ptr<GCStrategy> S = I->instantiate();
-      S->Name = Name;
-      StrategyMap[Name] = S.get();
-      StrategyList.push_back(std::move(S));
-      return StrategyList.back().get();
-    }
-  }
- 
-  dbgs() << "unsupported GC: " << Name << "\n";
-  llvm_unreachable(nullptr);
-}
-
 GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
   assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
   assert(F.hasGC());
-  
+
   finfo_map_type::iterator I = FInfoMap.find(&F);
   if (I != FInfoMap.end())
     return *I->second;
-  
-  GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+
+  GCStrategy *S = getGCStrategy(F.getGC());
   Functions.push_back(make_unique<GCFunctionInfo>(F, *S));
   GCFunctionInfo *GFI = Functions.back().get();
   FInfoMap[&F] = GFI;
@@ -100,8 +76,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
 void GCModuleInfo::clear() {
   Functions.clear();
   FInfoMap.clear();
-  StrategyMap.clear();
-  StrategyList.clear();
+  GCStrategyList.clear();
 }
 
 // -----------------------------------------------------------------------------
@@ -112,7 +87,6 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
   return new Printer(OS);
 }
 
-
 const char *Printer::getPassName() const {
   return "Print Garbage Collector Information";
 }
@@ -125,42 +99,45 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static const char *DescKind(GC::PointKind Kind) {
   switch (Kind) {
-    case GC::Loop:     return "loop";
-    case GC::Return:   return "return";
-    case GC::PreCall:  return "pre-call";
-    case GC::PostCall: return "post-call";
+  case GC::PreCall:
+    return "pre-call";
+  case GC::PostCall:
+    return "post-call";
   }
   llvm_unreachable("Invalid point kind");
 }
 
 bool Printer::runOnFunction(Function &F) {
-  if (F.hasGC()) return false;
-  
+  if (F.hasGC())
+    return false;
+
   GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
-  
+
   OS << "GC roots for " << FD->getFunction().getName() << ":\n";
   for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
-                                      RE = FD->roots_end(); RI != RE; ++RI)
+                                      RE = FD->roots_end();
+       RI != RE; ++RI)
     OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
-  
+
   OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
-  for (GCFunctionInfo::iterator PI = FD->begin(),
-                                PE = FD->end(); PI != PE; ++PI) {
-    
-    OS << "\t" << PI->Label->getName() << ": "
-       << DescKind(PI->Kind) << ", live = {";
-    
+  for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
+       ++PI) {
+
+    OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind)
+       << ", live = {";
+
     for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
-                                       RE = FD->live_end(PI);;) {
+                                       RE = FD->live_end(PI);
+         ;) {
       OS << " " << RI->Num;
       if (++RI == RE)
         break;
       OS << ",";
     }
-    
+
     OS << " }\n";
   }
-  
+
   return false;
 }
 
@@ -170,3 +147,31 @@ bool Printer::doFinalization(Module &M) {
   GMI->clear();
   return false;
 }
+
+GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
+  // TODO: Arguably, just doing a linear search would be faster for small N
+  auto NMI = GCStrategyMap.find(Name);
+  if (NMI != GCStrategyMap.end())
+    return NMI->getValue();
+  
+  for (auto& Entry : GCRegistry::entries()) {
+    if (Name == Entry.getName()) {
+      std::unique_ptr<GCStrategy> S = Entry.instantiate();
+      S->Name = Name;
+      GCStrategyMap[Name] = S.get();
+      GCStrategyList.push_back(std::move(S));
+      return GCStrategyList.back().get();
+    }
+  }
+
+  if (GCRegistry::begin() == GCRegistry::end()) {
+    // In normal operation, the registry should not be empty.  There should 
+    // be the builtin GCs if nothing else.  The most likely scenario here is
+    // that we got here without running the initializers used by the Registry 
+    // itself and it's registration mechanism.
+    const std::string error = ("unsupported GC: " + Name).str() + 
+      " (did you remember to link and initialize the CodeGen library?)";
+    report_fatal_error(error);
+  } else
+    report_fatal_error(std::string("unsupported GC: ") + Name);
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
index fdff4a7..bb8cfa1 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -14,6 +14,6 @@
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 using namespace llvm;
 
-GCMetadataPrinter::GCMetadataPrinter() { }
+GCMetadataPrinter::GCMetadataPrinter() {}
 
-GCMetadataPrinter::~GCMetadataPrinter() { }
+GCMetadataPrinter::~GCMetadataPrinter() {}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
new file mode 100644
index 0000000..d8edd7e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -0,0 +1,354 @@
+//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering for the gc.root mechanism.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+/// directed by the GCStrategy. It also performs automatic root initialization
+/// and custom intrinsic lowering.
+class LowerIntrinsics : public FunctionPass {
+  bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+
+public:
+  static char ID;
+
+  LowerIntrinsics();
+  const char *getPassName() const override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool doInitialization(Module &M) override;
+  bool runOnFunction(Function &F) override;
+};
+
+/// GCMachineCodeAnalysis - This is a target-independent pass over the machine
+/// function representation to identify safe points for the garbage collector
+/// in the machine code. It inserts labels at safe points and populates a
+/// GCMetadata record for each function.
+class GCMachineCodeAnalysis : public MachineFunctionPass {
+  GCFunctionInfo *FI;
+  MachineModuleInfo *MMI;
+  const TargetInstrInfo *TII;
+
+  void FindSafePoints(MachineFunction &MF);
+  void VisitCallPoint(MachineBasicBlock::iterator MI);
+  MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                        DebugLoc DL) const;
+
+  void FindStackOffsets(MachineFunction &MF);
+
+public:
+  static char ID;
+
+  GCMachineCodeAnalysis();
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); }
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
+  initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+}
+
+const char *LowerIntrinsics::getPassName() const {
+  return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+  FunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+static bool NeedsDefaultLoweringPass(const GCStrategy &C) {
+  // Default lowering is necessary only if read or write barriers have a default
+  // action. The default for roots is no action.
+  return !C.customWriteBarrier() || !C.customReadBarrier() ||
+         C.initializeRoots();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isDeclaration() && I->hasGC())
+      MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+  return false;
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+static bool CouldBecomeSafePoint(Instruction *I) {
+  // The natural definition of instructions which could introduce safe points
+  // are:
+  //
+  //   - call, invoke (AfterCall, BeforeCall)
+  //   - phis (Loops)
+  //   - invoke, ret, unwind (Exit)
+  //
+  // However, instructions as seemingly inoccuous as arithmetic can become
+  // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+  // it is necessary to take a conservative approach.
+
+  if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) ||
+      isa<LoadInst>(I))
+    return false;
+
+  // llvm.gcroot is safe because it doesn't do anything at runtime.
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    if (Function *F = CI->getCalledFunction())
+      if (Intrinsic::ID IID = F->getIntrinsicID())
+        if (IID == Intrinsic::gcroot)
+          return false;
+
+  return true;
+}
+
+static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
+                                   unsigned Count) {
+  // Scroll past alloca instructions.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  while (isa<AllocaInst>(IP))
+    ++IP;
+
+  // Search for initializers in the initial BB.
+  SmallPtrSet<AllocaInst *, 16> InitedRoots;
+  for (; !CouldBecomeSafePoint(IP); ++IP)
+    if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+      if (AllocaInst *AI =
+              dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+        InitedRoots.insert(AI);
+
+  // Add root initializers.
+  bool MadeChange = false;
+
+  for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+    if (!InitedRoots.count(*I)) {
+      StoreInst *SI = new StoreInst(
+          ConstantPointerNull::get(cast<PointerType>(
+              cast<PointerType>((*I)->getType())->getElementType())),
+          *I);
+      SI->insertAfter(*I);
+      MadeChange = true;
+    }
+
+  return MadeChange;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+  // Quick exit for functions that do not use GC.
+  if (!F.hasGC())
+    return false;
+
+  GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  GCStrategy &S = FI.getStrategy();
+
+  bool MadeChange = false;
+
+  if (NeedsDefaultLoweringPass(S))
+    MadeChange |= PerformDefaultLowering(F, S);
+
+  return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+  bool LowerWr = !S.customWriteBarrier();
+  bool LowerRd = !S.customReadBarrier();
+  bool InitRoots = S.initializeRoots();
+
+  SmallVector<AllocaInst *, 32> Roots;
+
+  bool MadeChange = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+        Function *F = CI->getCalledFunction();
+        switch (F->getIntrinsicID()) {
+        case Intrinsic::gcwrite:
+          if (LowerWr) {
+            // Replace a write barrier with a simple store.
+            Value *St =
+                new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
+            CI->replaceAllUsesWith(St);
+            CI->eraseFromParent();
+          }
+          break;
+        case Intrinsic::gcread:
+          if (LowerRd) {
+            // Replace a read barrier with a simple load.
+            Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+            Ld->takeName(CI);
+            CI->replaceAllUsesWith(Ld);
+            CI->eraseFromParent();
+          }
+          break;
+        case Intrinsic::gcroot:
+          if (InitRoots) {
+            // Initialize the GC root, but do not delete the intrinsic. The
+            // backend needs the intrinsic to flag the stack slot.
+            Roots.push_back(
+                cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+          }
+          break;
+        default:
+          continue;
+        }
+
+        MadeChange = true;
+      }
+    }
+  }
+
+  if (Roots.size())
+    MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+  return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
+
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+                "Analyze Machine Code For Garbage Collection", false, false)
+
+GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {}
+
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  MachineFunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
+  AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MI,
+                                             DebugLoc DL) const {
+  MCSymbol *Label = MBB.getParent()->getContext().createTempSymbol();
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+  return Label;
+}
+
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+  // Find the return address (next instruction), too, so as to bracket the call
+  // instruction.
+  MachineBasicBlock::iterator RAI = CI;
+  ++RAI;
+
+  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+    MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+  }
+
+  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+    MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+  }
+}
+
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+  for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+       ++BBI)
+    for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+         MI != ME; ++MI)
+      if (MI->isCall()) {
+        // Do not treat tail or sibling call sites as safe points.  This is
+        // legal since any arguments passed to the callee which live in the
+        // remnants of the callers frame will be owned and updated by the
+        // callee if required.
+        if (MI->isTerminator())
+          continue;
+        VisitCallPoint(MI);
+      }
+}
+
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  assert(TFI && "TargetRegisterInfo not available!");
+
+  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+       RI != FI->roots_end();) {
+    // If the root references a dead object, no need to keep it.
+    if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+      RI = FI->removeStackRoot(RI);
+    } else {
+      RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+      ++RI;
+    }
+  }
+}
+
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+  // Quick exit for functions that do not use GC.
+  if (!MF.getFunction()->hasGC())
+    return false;
+
+  FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+  MMI = &getAnalysis<MachineModuleInfo>();
+  TII = MF.getSubtarget().getInstrInfo();
+
+  // Find the size of the stack frame.  There may be no correct static frame
+  // size, we use UINT64_MAX to represent this.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+    RegInfo->needsStackRealignment(MF);
+  FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize());
+
+  // Find all safe points.
+  if (FI->getStrategy().needsSafePoints())
+    FindSafePoints(MF);
+
+  // Find the concrete stack offsets for all roots (stack slots)
+  FindStackOffsets(MF);
+
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 05c36fc..554d326 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -1,4 +1,4 @@
-//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//===-- GCStrategy.cpp - Garbage Collector Description --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,399 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements target- and collector-independent garbage collection
-// infrastructure.
-//
-// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
-// Roots are identified in SelectionDAGISel.
+// This file implements the policy object GCStrategy which describes the
+// behavior of a given garbage collector.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
-namespace {
-
-  /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
-  /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
-  /// directed by the GCStrategy. It also performs automatic root initialization
-  /// and custom intrinsic lowering.
-  class LowerIntrinsics : public FunctionPass {
-    static bool NeedsDefaultLoweringPass(const GCStrategy &C);
-    static bool NeedsCustomLoweringPass(const GCStrategy &C);
-    static bool CouldBecomeSafePoint(Instruction *I);
-    bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
-    static bool InsertRootInitializers(Function &F,
-                                       AllocaInst **Roots, unsigned Count);
-
-  public:
-    static char ID;
-
-    LowerIntrinsics();
-    const char *getPassName() const override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-    bool doInitialization(Module &M) override;
-    bool runOnFunction(Function &F) override;
-  };
-
-
-  /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
-  /// function representation to identify safe points for the garbage collector
-  /// in the machine code. It inserts labels at safe points and populates a
-  /// GCMetadata record for each function.
-  class GCMachineCodeAnalysis : public MachineFunctionPass {
-    const TargetMachine *TM;
-    GCFunctionInfo *FI;
-    MachineModuleInfo *MMI;
-    const TargetInstrInfo *TII;
-
-    void FindSafePoints(MachineFunction &MF);
-    void VisitCallPoint(MachineBasicBlock::iterator MI);
-    MCSymbol *InsertLabel(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          DebugLoc DL) const;
-
-    void FindStackOffsets(MachineFunction &MF);
-
-  public:
-    static char ID;
-
-    GCMachineCodeAnalysis();
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-    bool runOnMachineFunction(MachineFunction &MF) override;
-  };
-
-}
-
-// -----------------------------------------------------------------------------
-
-GCStrategy::GCStrategy() :
-  UseStatepoints(false),
-  NeededSafePoints(0),
-  CustomReadBarriers(false),
-  CustomWriteBarriers(false),
-  CustomRoots(false),
-  CustomSafePoints(false),
-  InitRoots(true),
-  UsesMetadata(false)
-{}
-
-// -----------------------------------------------------------------------------
-
-INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
-INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
-
-FunctionPass *llvm::createGCLoweringPass() {
-  return new LowerIntrinsics();
-}
-
-char LowerIntrinsics::ID = 0;
-
-LowerIntrinsics::LowerIntrinsics()
-  : FunctionPass(ID) {
-    initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
-  }
-
-const char *LowerIntrinsics::getPassName() const {
-  return "Lower Garbage Collection Instructions";
-}
-
-void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
-  FunctionPass::getAnalysisUsage(AU);
-  AU.addRequired<GCModuleInfo>();
-  AU.addPreserved<DominatorTreeWrapperPass>();
-}
-
-/// doInitialization - If this module uses the GC intrinsics, find them now.
-bool LowerIntrinsics::doInitialization(Module &M) {
-  // FIXME: This is rather antisocial in the context of a JIT since it performs
-  //        work against the entire module. But this cannot be done at
-  //        runFunction time (initializeCustomLowering likely needs to change
-  //        the module).
-  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
-  assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (!I->isDeclaration() && I->hasGC())
-      MI->getFunctionInfo(*I); // Instantiate the GC strategy.
-
-  bool MadeChange = false;
-  for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
-    if (NeedsCustomLoweringPass(**I))
-      if ((*I)->initializeCustomLowering(M))
-        MadeChange = true;
-
-  return MadeChange;
-}
-
-bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
-                                                          unsigned Count) {
-  // Scroll past alloca instructions.
-  BasicBlock::iterator IP = F.getEntryBlock().begin();
-  while (isa<AllocaInst>(IP)) ++IP;
-
-  // Search for initializers in the initial BB.
-  SmallPtrSet<AllocaInst*,16> InitedRoots;
-  for (; !CouldBecomeSafePoint(IP); ++IP)
-    if (StoreInst *SI = dyn_cast<StoreInst>(IP))
-      if (AllocaInst *AI =
-          dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
-        InitedRoots.insert(AI);
-
-  // Add root initializers.
-  bool MadeChange = false;
-
-  for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
-    if (!InitedRoots.count(*I)) {
-      StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
-                        cast<PointerType>((*I)->getType())->getElementType())),
-                        *I);
-      SI->insertAfter(*I);
-      MadeChange = true;
-    }
-
-  return MadeChange;
-}
-
-bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
-  // Default lowering is necessary only if read or write barriers have a default
-  // action. The default for roots is no action.
-  return !C.customWriteBarrier()
-      || !C.customReadBarrier()
-      || C.initializeRoots();
-}
-
-bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
-  // Custom lowering is only necessary if enabled for some action.
-  return C.customWriteBarrier()
-      || C.customReadBarrier()
-      || C.customRoots();
-}
-
-/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
-/// instruction could introduce a safe point.
-bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
-  // The natural definition of instructions which could introduce safe points
-  // are:
-  //
-  //   - call, invoke (AfterCall, BeforeCall)
-  //   - phis (Loops)
-  //   - invoke, ret, unwind (Exit)
-  //
-  // However, instructions as seemingly inoccuous as arithmetic can become
-  // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
-  // it is necessary to take a conservative approach.
-
-  if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
-      isa<StoreInst>(I) || isa<LoadInst>(I))
-    return false;
-
-  // llvm.gcroot is safe because it doesn't do anything at runtime.
-  if (CallInst *CI = dyn_cast<CallInst>(I))
-    if (Function *F = CI->getCalledFunction())
-      if (unsigned IID = F->getIntrinsicID())
-        if (IID == Intrinsic::gcroot)
-          return false;
-
-  return true;
-}
-
-/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
-/// Leave gcroot intrinsics; the code generator needs to see those.
-bool LowerIntrinsics::runOnFunction(Function &F) {
-  // Quick exit for functions that do not use GC.
-  if (!F.hasGC())
-    return false;
-
-  GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
-  GCStrategy &S = FI.getStrategy();
-
-  bool MadeChange = false;
-
-  if (NeedsDefaultLoweringPass(S))
-    MadeChange |= PerformDefaultLowering(F, S);
-
-  bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
-  if (UseCustomLoweringPass)
-    MadeChange |= S.performCustomLowering(F);
-
-  // Custom lowering may modify the CFG, so dominators must be recomputed.
-  if (UseCustomLoweringPass) {
-    if (DominatorTreeWrapperPass *DTWP =
-            getAnalysisIfAvailable<DominatorTreeWrapperPass>())
-      DTWP->getDomTree().recalculate(F);
-  }
-
-  return MadeChange;
-}
-
-bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
-  bool LowerWr = !S.customWriteBarrier();
-  bool LowerRd = !S.customReadBarrier();
-  bool InitRoots = S.initializeRoots();
-
-  SmallVector<AllocaInst*, 32> Roots;
-
-  bool MadeChange = false;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
-      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
-        Function *F = CI->getCalledFunction();
-        switch (F->getIntrinsicID()) {
-        case Intrinsic::gcwrite:
-          if (LowerWr) {
-            // Replace a write barrier with a simple store.
-            Value *St = new StoreInst(CI->getArgOperand(0),
-                                      CI->getArgOperand(2), CI);
-            CI->replaceAllUsesWith(St);
-            CI->eraseFromParent();
-          }
-          break;
-        case Intrinsic::gcread:
-          if (LowerRd) {
-            // Replace a read barrier with a simple load.
-            Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
-            Ld->takeName(CI);
-            CI->replaceAllUsesWith(Ld);
-            CI->eraseFromParent();
-          }
-          break;
-        case Intrinsic::gcroot:
-          if (InitRoots) {
-            // Initialize the GC root, but do not delete the intrinsic. The
-            // backend needs the intrinsic to flag the stack slot.
-            Roots.push_back(cast<AllocaInst>(
-                              CI->getArgOperand(0)->stripPointerCasts()));
-          }
-          break;
-        default:
-          continue;
-        }
-
-        MadeChange = true;
-      }
-    }
-  }
-
-  if (Roots.size())
-    MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
-
-  return MadeChange;
-}
-
-// -----------------------------------------------------------------------------
-
-char GCMachineCodeAnalysis::ID = 0;
-char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
-
-INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
-                "Analyze Machine Code For Garbage Collection", false, false)
-
-GCMachineCodeAnalysis::GCMachineCodeAnalysis()
-  : MachineFunctionPass(ID) {}
-
-void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  MachineFunctionPass::getAnalysisUsage(AU);
-  AU.setPreservesAll();
-  AU.addRequired<MachineModuleInfo>();
-  AU.addRequired<GCModuleInfo>();
-}
-
-MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MI,
-                                             DebugLoc DL) const {
-  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
-  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
-  return Label;
-}
-
-void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
-  // Find the return address (next instruction), too, so as to bracket the call
-  // instruction.
-  MachineBasicBlock::iterator RAI = CI;
-  ++RAI;
-
-  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
-    MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
-    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
-  }
-
-  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
-    MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
-    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
-  }
-}
-
-void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
-  for (MachineFunction::iterator BBI = MF.begin(),
-                                 BBE = MF.end(); BBI != BBE; ++BBI)
-    for (MachineBasicBlock::iterator MI = BBI->begin(),
-                                     ME = BBI->end(); MI != ME; ++MI)
-      if (MI->isCall())
-        VisitCallPoint(MI);
-}
-
-void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
-  const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering();
-  assert(TFI && "TargetRegisterInfo not available!");
-
-  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
-       RI != FI->roots_end();) {
-    // If the root references a dead object, no need to keep it.
-    if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
-      RI = FI->removeStackRoot(RI);
-    } else {
-      RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
-      ++RI;
-    }
-  }
-}
-
-bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
-  // Quick exit for functions that do not use GC.
-  if (!MF.getFunction()->hasGC())
-    return false;
-
-  FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
-  if (!FI->getStrategy().needsSafePoints())
-    return false;
-
-  TM = &MF.getTarget();
-  MMI = &getAnalysis<MachineModuleInfo>();
-  TII = TM->getSubtargetImpl()->getInstrInfo();
-
-  // Find the size of the stack frame.
-  FI->setFrameSize(MF.getFrameInfo()->getStackSize());
-
-  // Find all safe points.
-  if (FI->getStrategy().customSafePoints()) {
-    FI->getStrategy().findCustomSafePoints(*FI, MF);
-  } else {
-    FindSafePoints(MF);
-  }
-
-  // Find the stack offsets for all roots.
-  FindStackOffsets(MF);
-
-  return false;
-}
+GCStrategy::GCStrategy()
+    : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
+      CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
+      UsesMetadata(false) {}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 8257567..79de175 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -49,9 +49,19 @@
 //  str     r0, [r5], #4
 //
 //  note that we saved 2 registers here almostly "for free".
+//
+// However, merging globals can have tradeoffs:
+// - it confuses debuggers, tools, and users
+// - it makes linker optimizations less useful (order files, LOHs, ...)
+// - it forces usage of indexed addressing (which isn't necessarily "free")
+// - it can increase register pressure when the uses are disparate enough.
+// 
+// We use heuristics to discover the best global grouping we can (cf cl::opts).
 // ===---------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/Passes.h"
@@ -66,18 +76,31 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
 using namespace llvm;
 
 #define DEBUG_TYPE "global-merge"
 
+// FIXME: This is only useful as a last-resort way to disable the pass.
 static cl::opt<bool>
 EnableGlobalMerge("enable-global-merge", cl::Hidden,
-                  cl::desc("Enable global merge pass"),
+                  cl::desc("Enable the global merge pass"),
                   cl::init(true));
 
+static cl::opt<bool> GlobalMergeGroupByUse(
+    "global-merge-group-by-use", cl::Hidden,
+    cl::desc("Improve global merge pass to look at uses"), cl::init(true));
+
+static cl::opt<bool> GlobalMergeIgnoreSingleUse(
+    "global-merge-ignore-single-use", cl::Hidden,
+    cl::desc("Improve global merge pass to ignore globals only used alone"),
+    cl::init(true));
+
 static cl::opt<bool>
 EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
                          cl::desc("Enable global merge pass on constants"),
@@ -90,13 +113,24 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
      cl::desc("Enable global merge pass on external linkage"),
      cl::init(false));
 
-STATISTIC(NumMerged      , "Number of globals merged");
+STATISTIC(NumMerged, "Number of globals merged");
 namespace {
   class GlobalMerge : public FunctionPass {
     const TargetMachine *TM;
+    const DataLayout *DL;
+    // FIXME: Infer the maximum possible offset depending on the actual users
+    // (these max offsets are different for the users inside Thumb or ARM
+    // functions), see the code that passes in the offset in the ARM backend
+    // for more information.
+    unsigned MaxOffset;
 
     bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                  Module &M, bool isConst, unsigned AddrSpace) const;
+    /// \brief Merge everything in \p Globals for which the corresponding bit
+    /// in \p GlobalSet is set.
+    bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+                 const BitVector &GlobalSet, Module &M, bool isConst,
+                 unsigned AddrSpace) const;
 
     /// \brief Check if the given variable has been identified as must keep
     /// \pre setMustKeepGlobalVariables must have been called on the Module that
@@ -117,8 +151,10 @@ namespace {
 
   public:
     static char ID;             // Pass identification, replacement for typeid.
-    explicit GlobalMerge(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), TM(TM) {
+    explicit GlobalMerge(const TargetMachine *TM = nullptr,
+                         unsigned MaximalOffset = 0)
+        : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()),
+          MaxOffset(MaximalOffset) {
       initializeGlobalMergePass(*PassRegistry::getPassRegistry());
     }
 
@@ -138,48 +174,243 @@ namespace {
 } // end anonymous namespace
 
 char GlobalMerge::ID = 0;
-INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables",
-                   false, false)
+INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables",
+                      false, false)
+INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables",
+                    false, false)
 
 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                           Module &M, bool isConst, unsigned AddrSpace) const {
-  const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
-  const DataLayout *DL = TLI->getDataLayout();
-
-  // FIXME: Infer the maximum possible offset depending on the actual users
-  // (these max offsets are different for the users inside Thumb or ARM
-  // functions)
-  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
-
   // FIXME: Find better heuristics
   std::stable_sort(Globals.begin(), Globals.end(),
-                   [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+                   [this](const GlobalVariable *GV1, const GlobalVariable *GV2) {
     Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
     Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
 
     return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2));
   });
 
+  // If we want to just blindly group all globals together, do so.
+  if (!GlobalMergeGroupByUse) {
+    BitVector AllGlobals(Globals.size());
+    AllGlobals.set();
+    return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
+  }
+
+  // If we want to be smarter, look at all uses of each global, to try to
+  // discover all sets of globals used together, and how many times each of
+  // these sets occured.
+  //
+  // Keep this reasonably efficient, by having an append-only list of all sets
+  // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
+  // code (currently, a Function) to the set of globals seen so far that are
+  // used together in that unit (GlobalUsesByFunction).
+  //
+  // When we look at the Nth global, we now that any new set is either:
+  // - the singleton set {N}, containing this global only, or
+  // - the union of {N} and a previously-discovered set, containing some
+  //   combination of the previous N-1 globals.
+  // Using that knowledge, when looking at the Nth global, we can keep:
+  // - a reference to the singleton set {N} (CurGVOnlySetIdx)
+  // - a list mapping each previous set to its union with {N} (EncounteredUGS),
+  //   if it actually occurs.
+
+  // We keep track of the sets of globals used together "close enough".
+  struct UsedGlobalSet {
+    UsedGlobalSet(size_t Size) : Globals(Size), UsageCount(1) {}
+    BitVector Globals;
+    unsigned UsageCount;
+  };
+
+  // Each set is unique in UsedGlobalSets.
+  std::vector<UsedGlobalSet> UsedGlobalSets;
+
+  // Avoid repeating the create-global-set pattern.
+  auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
+    UsedGlobalSets.emplace_back(Globals.size());
+    return UsedGlobalSets.back();
+  };
+
+  // The first set is the empty set.
+  CreateGlobalSet().UsageCount = 0;
+
+  // We define "close enough" to be "in the same function".
+  // FIXME: Grouping uses by function is way too aggressive, so we should have
+  // a better metric for distance between uses.
+  // The obvious alternative would be to group by BasicBlock, but that's in
+  // turn too conservative..
+  // Anything in between wouldn't be trivial to compute, so just stick with
+  // per-function grouping.
+
+  // The value type is an index into UsedGlobalSets.
+  // The default (0) conveniently points to the empty set.
+  DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
+
+  // Now, look at each merge-eligible global in turn.
+
+  // Keep track of the sets we already encountered to which we added the
+  // current global.
+  // Each element matches the same-index element in UsedGlobalSets.
+  // This lets us efficiently tell whether a set has already been expanded to
+  // include the current global.
+  std::vector<size_t> EncounteredUGS;
+
+  for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
+    GlobalVariable *GV = Globals[GI];
+
+    // Reset the encountered sets for this global...
+    std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
+    // ...and grow it in case we created new sets for the previous global.
+    EncounteredUGS.resize(UsedGlobalSets.size());
+
+    // We might need to create a set that only consists of the current global.
+    // Keep track of its index into UsedGlobalSets.
+    size_t CurGVOnlySetIdx = 0;
+
+    // For each global, look at all its Uses.
+    for (auto &U : GV->uses()) {
+      // This Use might be a ConstantExpr.  We're interested in Instruction
+      // users, so look through ConstantExpr...
+      Use *UI, *UE;
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
+        UI = &*CE->use_begin();
+        UE = nullptr;
+      } else if (isa<Instruction>(U.getUser())) {
+        UI = &U;
+        UE = UI->getNext();
+      } else {
+        continue;
+      }
+
+      // ...to iterate on all the instruction users of the global.
+      // Note that we iterate on Uses and not on Users to be able to getNext().
+      for (; UI != UE; UI = UI->getNext()) {
+        Instruction *I = dyn_cast<Instruction>(UI->getUser());
+        if (!I)
+          continue;
+
+        Function *ParentFn = I->getParent()->getParent();
+        size_t UGSIdx = GlobalUsesByFunction[ParentFn];
+
+        // If this is the first global the basic block uses, map it to the set
+        // consisting of this global only.
+        if (!UGSIdx) {
+          // If that set doesn't exist yet, create it.
+          if (!CurGVOnlySetIdx) {
+            CurGVOnlySetIdx = UsedGlobalSets.size();
+            CreateGlobalSet().Globals.set(GI);
+          } else {
+            ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
+          }
+
+          GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
+          continue;
+        }
+
+        // If we already encountered this BB, just increment the counter.
+        if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
+          ++UsedGlobalSets[UGSIdx].UsageCount;
+          continue;
+        }
+
+        // If not, the previous set wasn't actually used in this function.
+        --UsedGlobalSets[UGSIdx].UsageCount;
+
+        // If we already expanded the previous set to include this global, just
+        // reuse that expanded set.
+        if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
+          ++UsedGlobalSets[ExpandedIdx].UsageCount;
+          GlobalUsesByFunction[ParentFn] = ExpandedIdx;
+          continue;
+        }
+
+        // If not, create a new set consisting of the union of the previous set
+        // and this global.  Mark it as encountered, so we can reuse it later.
+        GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
+            UsedGlobalSets.size();
+
+        UsedGlobalSet &NewUGS = CreateGlobalSet();
+        NewUGS.Globals.set(GI);
+        NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
+      }
+    }
+  }
+
+  // Now we found a bunch of sets of globals used together.  We accumulated
+  // the number of times we encountered the sets (i.e., the number of blocks
+  // that use that exact set of globals).
+  //
+  // Multiply that by the size of the set to give us a crude profitability
+  // metric.
+  std::sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
+            [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
+              return UGS1.Globals.count() * UGS1.UsageCount <
+                     UGS2.Globals.count() * UGS2.UsageCount;
+            });
+
+  // We can choose to merge all globals together, but ignore globals never used
+  // with another global.  This catches the obviously non-profitable cases of
+  // having a single global, but is aggressive enough for any other case.
+  if (GlobalMergeIgnoreSingleUse) {
+    BitVector AllGlobals(Globals.size());
+    for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
+      const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+      if (UGS.UsageCount == 0)
+        continue;
+      if (UGS.Globals.count() > 1)
+        AllGlobals |= UGS.Globals;
+    }
+    return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
+  }
+
+  // Starting from the sets with the best (=biggest) profitability, find a
+  // good combination.
+  // The ideal (and expensive) solution can only be found by trying all
+  // combinations, looking for the one with the best profitability.
+  // Don't be smart about it, and just pick the first compatible combination,
+  // starting with the sets with the best profitability.
+  BitVector PickedGlobals(Globals.size());
+  bool Changed = false;
+
+  for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
+    const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+    if (UGS.UsageCount == 0)
+      continue;
+    if (PickedGlobals.anyCommon(UGS.Globals))
+      continue;
+    PickedGlobals |= UGS.Globals;
+    // If the set only contains one global, there's no point in merging.
+    // Ignore the global for inclusion in other sets though, so keep it in
+    // PickedGlobals.
+    if (UGS.Globals.count() < 2)
+      continue;
+    Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
+  }
+
+  return Changed;
+}
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+                          const BitVector &GlobalSet, Module &M, bool isConst,
+                          unsigned AddrSpace) const {
+
   Type *Int32Ty = Type::getInt32Ty(M.getContext());
 
   assert(Globals.size() > 1);
 
-  // FIXME: This simple solution merges globals all together as maximum as
-  // possible. However, with this solution it would be hard to remove dead
-  // global symbols at link-time. An alternative solution could be checking
-  // global symbols references function by function, and make the symbols
-  // being referred in the same function merged and we would probably need
-  // to introduce heuristic algorithm to solve the merge conflict from
-  // different functions.
-  for (size_t i = 0, e = Globals.size(); i != e; ) {
-    size_t j = 0;
+  DEBUG(dbgs() << " Trying to merge set, starts with #"
+               << GlobalSet.find_first() << "\n");
+
+  ssize_t i = GlobalSet.find_first();
+  while (i != -1) {
+    ssize_t j = 0;
     uint64_t MergedSize = 0;
     std::vector<Type*> Tys;
     std::vector<Constant*> Inits;
 
     bool HasExternal = false;
     GlobalVariable *TheFirstExternal = 0;
-    for (j = i; j != e; ++j) {
+    for (j = i; j != -1; j = GlobalSet.find_next(j)) {
       Type *Ty = Globals[j]->getType()->getElementType();
       MergedSize += DL->getTypeAllocSize(Ty);
       if (MergedSize > MaxOffset) {
@@ -212,23 +443,23 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                     : "_MergedGlobals",
         nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
 
-    for (size_t k = i; k < j; ++k) {
+    for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k)) {
       GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
       std::string Name = Globals[k]->getName();
 
       Constant *Idx[2] = {
         ConstantInt::get(Int32Ty, 0),
-        ConstantInt::get(Int32Ty, k-i)
+        ConstantInt::get(Int32Ty, idx++)
       };
-      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+      Constant *GEP =
+          ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
       Globals[k]->replaceAllUsesWith(GEP);
       Globals[k]->eraseFromParent();
 
       if (Linkage != GlobalValue::InternalLinkage) {
         // Generate a new alias...
         auto *PTy = cast<PointerType>(GEP->getType());
-        GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
-                            Linkage, Name, GEP, &M);
+        GlobalAlias::create(PTy, Linkage, Name, GEP, &M);
       }
 
       NumMerged++;
@@ -282,9 +513,6 @@ bool GlobalMerge::doInitialization(Module &M) {
 
   DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
                                                         BSSGlobals;
-  const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
-  const DataLayout *DL = TLI->getDataLayout();
-  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
   bool Changed = false;
   setMustKeepGlobalVariables(M);
 
@@ -357,6 +585,6 @@ bool GlobalMerge::doFinalization(Module &M) {
   return false;
 }
 
-Pass *llvm::createGlobalMergePass(const TargetMachine *TM) {
-  return new GlobalMerge(TM);
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) {
+  return new GlobalMerge(TM, Offset);
 }
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index e84d25d9..0d59c72 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -24,7 +24,6 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -247,7 +246,7 @@ namespace {
         return true;
       else if (Incr1 == Incr2) {
         // Favors subsumption.
-        if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+        if (!C1->NeedSubsumption && C2->NeedSubsumption)
           return true;
         else if (C1->NeedSubsumption == C2->NeedSubsumption) {
           // Favors diamond over triangle, etc.
@@ -271,15 +270,13 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
 
 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
-  TLI = MF.getSubtarget().getTargetLowering();
-  TII = MF.getSubtarget().getInstrInfo();
-  TRI = MF.getSubtarget().getRegisterInfo();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
+  TLI = ST.getTargetLowering();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   MRI = &MF.getRegInfo();
-
-  const TargetSubtargetInfo &ST =
-    MF.getTarget().getSubtarget<TargetSubtargetInfo>();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
 
   if (!TII) return false;
@@ -290,7 +287,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   if (!PreRegAlloc) {
     // Tail merge tend to expose more if-conversion opportunities.
     BranchFolder BF(true, false, *MBFI, *MBPI);
-    BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
+    BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
                                    getAnalysisIfAvailable<MachineModuleInfo>());
   }
 
@@ -728,6 +725,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
   if (BBI.IsDone || BBI.IsUnpredicable)
     return false;
 
+  // If it is already predicated but we couldn't analyze its terminator, the
+  // latter might fallthrough, but we can't determine where to.
+  // Conservatively avoid if-converting again.
+  if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
+    return false;
+
   // If it is already predicated, check if the new predicate subsumes
   // its predicate.
   if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
@@ -971,26 +974,37 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
 /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
 /// values defined in MI which are not live/used by MI.
 static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) {
-  for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) {
-    if (!Ops->isReg() || !Ops->isKill())
-      continue;
-    unsigned Reg = Ops->getReg();
-    if (Reg == 0)
-      continue;
-    Redefs.removeReg(Reg);
-  }
-  for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) {
-    if (!Ops->isReg() || !Ops->isDef())
-      continue;
-    unsigned Reg = Ops->getReg();
-    if (Reg == 0 || Redefs.contains(Reg))
+  SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
+  Redefs.stepForward(*MI, Clobbers);
+
+  // Now add the implicit uses for each of the clobbered values.
+  for (auto Reg : Clobbers) {
+    // FIXME: Const cast here is nasty, but better than making StepForward
+    // take a mutable instruction instead of const.
+    MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second);
+    MachineInstr *OpMI = Op.getParent();
+    MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
+    if (Op.isRegMask()) {
+      // First handle regmasks.  They clobber any entries in the mask which
+      // means that we need a def for those registers.
+      MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+
+      // We also need to add an implicit def of this register for the later
+      // use to read from.
+      // For the register allocator to have allocated a register clobbered
+      // by the call which is used later, it must be the case that
+      // the call doesn't return.
+      MIB.addReg(Reg.first, RegState::Implicit | RegState::Define);
       continue;
-    Redefs.addReg(Reg);
-
-    MachineOperand &Op = *Ops;
-    MachineInstr *MI = Op.getParent();
-    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
-    MIB.addReg(Reg, RegState::Implicit | RegState::Undef);
+    }
+    assert(Op.isReg() && "Register operand required");
+    if (Op.isDead()) {
+      // If we found a dead def, but it needs to be live, then remove the dead
+      // flag.
+      if (Redefs.contains(Op.getReg()))
+        Op.setIsDead(false);
+    }
+    MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
   }
 }
 
@@ -1370,7 +1384,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
 
   for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
        ++I) {
-    Redefs.stepForward(*I);
+    SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
+    Redefs.stepForward(*I, IgnoredClobbers);
   }
   BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
   BBI2->BB->erase(BBI2->BB->begin(), DI2);
@@ -1504,10 +1519,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
 }
 
 static bool MaySpeculate(const MachineInstr *MI,
-                         SmallSet<unsigned, 4> &LaterRedefs,
-                         const TargetInstrInfo *TII) {
+                         SmallSet<unsigned, 4> &LaterRedefs) {
   bool SawStore = true;
-  if (!MI->isSafeToMove(TII, nullptr, SawStore))
+  if (!MI->isSafeToMove(nullptr, SawStore))
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -1538,7 +1552,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
     // It may be possible not to predicate an instruction if it's the 'true'
     // side of a diamond and the 'false' side may re-define the instruction's
     // defs.
-    if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) {
+    if (MaySpec && MaySpeculate(I, *LaterRedefs)) {
       AnyUnpred = true;
       continue;
     }
@@ -1557,7 +1571,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
     UpdatePredRedefs(I, Redefs);
   }
 
-  std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+  BBI.Predicate.append(Cond.begin(), Cond.end());
 
   BBI.IsAnalyzed = false;
   BBI.NonPredSize = 0;
@@ -1622,9 +1636,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
     }
   }
 
-  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
-            std::back_inserter(ToBBI.Predicate));
-  std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+  ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
+  ToBBI.Predicate.append(Cond.begin(), Cond.end());
 
   ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
   ToBBI.IsAnalyzed = false;
@@ -1663,8 +1676,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   if (NBB && !FromBBI.BB->isSuccessor(NBB))
     FromBBI.BB->addSuccessor(NBB);
 
-  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
-            std::back_inserter(ToBBI.Predicate));
+  ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
   FromBBI.Predicate.clear();
 
   ToBBI.NonPredSize += FromBBI.NonPredSize;
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index df889f7..9989f23 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -135,7 +135,7 @@ private:
   // Dead defs generated during spilling.
   SmallVector<MachineInstr*, 8> DeadDefs;
 
-  ~InlineSpiller() {}
+  ~InlineSpiller() override {}
 
 public:
   InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
@@ -576,8 +576,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
         std::tie(SVI, Inserted) =
           SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
         // Add all the PHIs as dependents of NonPHI.
-        for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi)
-          SVI->second.Deps.push_back(PHIs[pi]);
+        SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(),
+                                PHIs.end());
         // This is the first time we see NonPHI, add it to the worklist.
         if (Inserted)
           WorkList.push_back(std::make_pair(Reg, NonPHI));
@@ -921,7 +921,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
 
   // Replace operands
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(Ops[i].second);
+    MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
     if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
       MO.setReg(NewVReg);
       MO.setIsKill();
@@ -1100,6 +1100,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
   SmallVector<unsigned, 8> FoldOps;
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
     unsigned Idx = Ops[i].second;
+    assert(MI == Ops[i].first && "Instruction conflict during operand folding");
     MachineOperand &MO = MI->getOperand(Idx);
     if (MO.isImplicit()) {
       ImpReg = MO.getReg();
@@ -1142,13 +1143,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
       continue;
     // FoldMI does not define this physreg. Remove the LI segment.
     assert(MO->isDead() && "Cannot fold physreg def");
-    for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
-      if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
-        SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
-        if (VNInfo *VNI = LR->getVNInfoAt(Idx))
-          LR->removeValNo(VNI);
-      }
-    }
+    SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+    LIS.removePhysRegDefAt(Reg, Idx);
   }
 
   LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
@@ -1237,6 +1233,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
       DebugLoc DL = MI->getDebugLoc();
       DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
       MachineBasicBlock *MBB = MI->getParent();
+      assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
           .addFrameIndex(StackSlot)
           .addImm(Offset)
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
index 187e015..fd5749b 100644
--- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -21,7 +21,8 @@ using namespace llvm;
 #define DEBUG_TYPE "regalloc"
 
 // Static member used for null interference cursors.
-InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
+const InterferenceCache::BlockInterference
+    InterferenceCache::Cursor::NoInterference;
 
 // Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a
 // buffer of size NumPhysRegs to speed up alloc/clear for targets with large
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h
index 1791afb..6519a80 100644
--- a/contrib/llvm/lib/CodeGen/InterferenceCache.h
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h
@@ -170,8 +170,8 @@ public:
   /// Cursor - The primary query interface for the block interference cache.
   class Cursor {
     Entry *CacheEntry;
-    BlockInterference *Current;
-    static BlockInterference NoInterference;
+    const BlockInterference *Current;
+    static const BlockInterference NoInterference;
 
     void setEntry(Entry *E) {
       Current = nullptr;
diff --git a/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp b/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp
deleted file mode 100644
index 75fa261..0000000
--- a/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief An implementation of jump-instruction tables.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jt"
-
-#include "llvm/CodeGen/JumpInstrTables.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <vector>
-
-using namespace llvm;
-
-char JumpInstrTables::ID = 0;
-
-INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables",
-                      "Jump-Instruction Tables", true, true)
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
-INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables",
-                    "Jump-Instruction Tables", true, true)
-
-STATISTIC(NumJumpTables, "Number of indirect call tables generated");
-STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables");
-
-ModulePass *llvm::createJumpInstrTablesPass() {
-  // The default implementation uses a single table for all functions.
-  return new JumpInstrTables(JumpTable::Single);
-}
-
-ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) {
-  return new JumpInstrTables(JTT);
-}
-
-namespace {
-static const char jump_func_prefix[] = "__llvm_jump_instr_table_";
-static const char jump_section_prefix[] = ".jump.instr.table.text.";
-
-// Checks to see if a given CallSite is making an indirect call, including
-// cases where the indirect call is made through a bitcast.
-bool isIndirectCall(CallSite &CS) {
-  if (CS.getCalledFunction())
-    return false;
-
-  // Check the value to see if it is merely a bitcast of a function. In
-  // this case, it will translate to a direct function call in the resulting
-  // assembly, so we won't treat it as an indirect call here.
-  const Value *V = CS.getCalledValue();
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
-  }
-
-  // Otherwise, since we know it's a call, it must be an indirect call
-  return true;
-}
-
-// Replaces Functions and GlobalAliases with a different Value.
-bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) {
-  User *Us = U->getUser();
-  if (!Us)
-    return false;
-  if (Instruction *I = dyn_cast<Instruction>(Us)) {
-    CallSite CS(I);
-
-    // Don't do the replacement if this use is a direct call to this function.
-    // If the use is not the called value, then replace it.
-    if (CS && (isIndirectCall(CS) || CS.isCallee(U))) {
-      return false;
-    }
-
-    U->set(V);
-  } else if (Constant *C = dyn_cast<Constant>(Us)) {
-    // Don't replace calls to bitcasts of function symbols, since they get
-    // translated to direct calls.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) {
-      if (CE->getOpcode() == Instruction::BitCast) {
-        // This bitcast must have exactly one user.
-        if (CE->user_begin() != CE->user_end()) {
-          User *ParentUs = *CE->user_begin();
-          if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) {
-            CallSite CS(CI);
-            Use &CEU = *CE->use_begin();
-            if (CS.isCallee(&CEU)) {
-              return false;
-            }
-          }
-        }
-      }
-    }
-
-    // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier
-    // requires alias to point to a defined function. So, GlobalAlias is handled
-    // as a separate case in runOnModule.
-    if (!isa<GlobalAlias>(C))
-      C->replaceUsesOfWithOnConstant(GV, V, U);
-  } else {
-    llvm_unreachable("The Use of a Function symbol is neither an instruction "
-                     "nor a constant");
-  }
-
-  return true;
-}
-
-// Replaces all replaceable address-taken uses of GV with a pointer to a
-// jump-instruction table entry.
-void replaceValueWithFunction(GlobalValue *GV, Function *F) {
-  // Go through all uses of this function and replace the uses of GV with the
-  // jump-table version of the function. Get the uses as a vector before
-  // replacing them, since replacing them changes the use list and invalidates
-  // the iterator otherwise.
-  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) {
-    Use &U = *I++;
-
-    // Replacement of constants replaces all instances in the constant. So, some
-    // uses might have already been handled by the time we reach them here.
-    if (U.get() == GV)
-      replaceGlobalValueIndirectUse(GV, F, &U);
-  }
-
-  return;
-}
-} // end anonymous namespace
-
-JumpInstrTables::JumpInstrTables()
-    : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0),
-      JTType(JumpTable::Single) {
-  initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT)
-    : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) {
-  initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTables::~JumpInstrTables() {}
-
-void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<JumpInstrTableInfo>();
-}
-
-Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
-  FunctionType *OrigFunTy = Target->getFunctionType();
-  FunctionType *FunTy = transformType(JTType, OrigFunTy);
-
-  JumpMap::iterator it = Metadata.find(FunTy);
-  if (Metadata.end() == it) {
-    struct TableMeta Meta;
-    Meta.TableNum = TableCount;
-    Meta.Count = 0;
-    Metadata[FunTy] = Meta;
-    it = Metadata.find(FunTy);
-    ++NumJumpTables;
-    ++TableCount;
-  }
-
-  it->second.Count++;
-
-  std::string NewName(jump_func_prefix);
-  NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str();
-  Function *JumpFun =
-      Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M);
-  // The section for this table
-  JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str());
-  JITI->insertEntry(FunTy, Target, JumpFun);
-
-  ++NumFuncsInJumpTables;
-  return JumpFun;
-}
-
-bool JumpInstrTables::hasTable(FunctionType *FunTy) {
-  FunctionType *TransTy = transformType(JTType, FunTy);
-  return Metadata.end() != Metadata.find(TransTy);
-}
-
-FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT,
-                                             FunctionType *FunTy) {
-  // Returning nullptr forces all types into the same table, since all types map
-  // to the same type
-  Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
-
-  // Ignore the return type.
-  Type *RetTy = VoidPtrTy;
-  bool IsVarArg = FunTy->isVarArg();
-  std::vector<Type *> ParamTys(FunTy->getNumParams());
-  FunctionType::param_iterator PI, PE;
-  int i = 0;
-
-  std::vector<Type *> EmptyParams;
-  Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
-  FunctionType *VoidFnTy = FunctionType::get(
-      Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
-  switch (JTT) {
-  case JumpTable::Single:
-
-    return FunctionType::get(RetTy, EmptyParams, false);
-  case JumpTable::Arity:
-    // Transform all types to void* so that all functions with the same arity
-    // end up in the same table.
-    for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
-         PI++, i++) {
-      ParamTys[i] = VoidPtrTy;
-    }
-
-    return FunctionType::get(RetTy, ParamTys, IsVarArg);
-  case JumpTable::Simplified:
-    // Project all parameters types to one of 3 types: composite, integer, and
-    // function, matching the three subclasses of Type.
-    for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
-         ++PI, ++i) {
-      assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) ||
-              isa<CompositeType>(*PI)) &&
-             "This type is not an Integer or a Composite or a Function");
-      if (isa<CompositeType>(*PI)) {
-        ParamTys[i] = VoidPtrTy;
-      } else if (isa<FunctionType>(*PI)) {
-        ParamTys[i] = VoidFnTy;
-      } else if (isa<IntegerType>(*PI)) {
-        ParamTys[i] = Int32Ty;
-      }
-    }
-
-    return FunctionType::get(RetTy, ParamTys, IsVarArg);
-  case JumpTable::Full:
-    // Don't transform this type at all.
-    return FunTy;
-  }
-
-  return nullptr;
-}
-
-bool JumpInstrTables::runOnModule(Module &M) {
-  JITI = &getAnalysis<JumpInstrTableInfo>();
-
-  // Get the set of jumptable-annotated functions that have their address taken.
-  DenseMap<Function *, Function *> Functions;
-  for (Function &F : M) {
-    if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) {
-      assert(F.hasUnnamedAddr() &&
-             "Attribute 'jumptable' requires 'unnamed_addr'");
-      Functions[&F] = nullptr;
-    }
-  }
-
-  // Create the jump-table functions.
-  for (auto &KV : Functions) {
-    Function *F = KV.first;
-    KV.second = insertEntry(M, F);
-  }
-
-  // GlobalAlias is a special case, because the target of an alias statement
-  // must be a defined function. So, instead of replacing a given function in
-  // the alias, we replace all uses of aliases that target jumptable functions.
-  // Note that there's no need to create these functions, since only aliases
-  // that target known jumptable functions are replaced, and there's no way to
-  // put the jumptable annotation on a global alias.
-  DenseMap<GlobalAlias *, Function *> Aliases;
-  for (GlobalAlias &GA : M.aliases()) {
-    Constant *Aliasee = GA.getAliasee();
-    if (Function *F = dyn_cast<Function>(Aliasee)) {
-      auto it = Functions.find(F);
-      if (it != Functions.end()) {
-        Aliases[&GA] = it->second;
-      }
-    }
-  }
-
-  // Replace each address taken function with its jump-instruction table entry.
-  for (auto &KV : Functions)
-    replaceValueWithFunction(KV.first, KV.second);
-
-  for (auto &KV : Aliases)
-    replaceValueWithFunction(KV.first, KV.second);
-
-  return !Functions.empty();
-}
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 9018314..610c9f4 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,32 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
-#include "llvm/CodeGen/JumpInstrTables.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
@@ -49,8 +43,16 @@ EnableFastISelOption("fast-isel", cl::Hidden,
   cl::desc("Enable the \"fast\" instruction selector"));
 
 void LLVMTargetMachine::initAsmInfo() {
-  MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
-      *getSubtargetImpl()->getRegisterInfo(), getTargetTriple());
+  MRI = TheTarget.createMCRegInfo(getTargetTriple());
+  MII = TheTarget.createMCInstrInfo();
+  // FIXME: Having an MCSubtargetInfo on the target machine is a hack due
+  // to some backends having subtarget feature dependent module level
+  // code generation. This is similar to the hack in the AsmPrinter for
+  // module level assembly etc.
+  STI = TheTarget.createMCSubtargetInfo(getTargetTriple(), getTargetCPU(),
+                                        getTargetFeatureString());
+
+  MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple());
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
   // and if the old one gets included then MCAsmInfo will be NULL and
   // we'll crash later.
@@ -68,17 +70,20 @@ void LLVMTargetMachine::initAsmInfo() {
   AsmInfo = TmpAsmInfo;
 }
 
-LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
-                                     StringRef CPU, StringRef FS,
-                                     TargetOptions Options,
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+                                     StringRef DataLayoutString,
+                                     StringRef Triple, StringRef CPU,
+                                     StringRef FS, TargetOptions Options,
                                      Reloc::Model RM, CodeModel::Model CM,
                                      CodeGenOpt::Level OL)
-  : TargetMachine(T, Triple, CPU, FS, Options) {
+    : TargetMachine(T, DataLayoutString, Triple, CPU, FS, Options) {
   CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
 }
 
-void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-  PM.add(createBasicTargetTransformInfoPass(this));
+TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(BasicTTIImpl(this, F));
+  });
 }
 
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
@@ -89,7 +94,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
                                           AnalysisID StopAfter) {
 
   // Add internal analysis passes from the target machine.
-  TM->addAnalysisPasses(PM);
+  PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
 
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
@@ -112,8 +117,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
   // Install a MachineModuleInfo class, which is an immutable pass that holds
   // all the per-module stuff we're generating, including MCContext.
   MachineModuleInfo *MMI = new MachineModuleInfo(
-      *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(),
-      &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering());
+      *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering());
   PM.add(MMI);
 
   // Set up a MachineFunction for the rest of CodeGen to work on.
@@ -136,22 +140,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
   return &MMI->getContext();
 }
 
-bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                            formatted_raw_ostream &Out,
-                                            CodeGenFileType FileType,
-                                            bool DisableVerify,
-                                            AnalysisID StartAfter,
-                                            AnalysisID StopAfter) {
-  // Passes to handle jumptable function annotations. These can't be handled at
-  // JIT time, so we don't add them directly to addPassesToGenerateCode.
-  PM.add(createJumpInstrTableInfoPass(
-      getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound()));
-  PM.add(createJumpInstrTablesPass(Options.JTType));
-  if (Options.FCFI)
-    PM.add(createForwardControlFlowIntegrityPass(
-        Options.JTType, Options.CFIType, Options.CFIEnforcing,
-        Options.getCFIFuncName()));
-
+bool LLVMTargetMachine::addPassesToEmitFile(
+    PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
   // Add common CodeGen passes.
   MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
                                                StartAfter, StopAfter);
@@ -171,27 +162,28 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   if (Options.MCOptions.MCSaveTempLabels)
     Context->setAllowTemporaryLabels(false);
 
-  const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+  const MCSubtargetInfo &STI = *getMCSubtargetInfo();
   const MCAsmInfo &MAI = *getMCAsmInfo();
-  const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
-  const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo();
+  const MCRegisterInfo &MRI = *getMCRegisterInfo();
+  const MCInstrInfo &MII = *getMCInstrInfo();
+
   std::unique_ptr<MCStreamer> AsmStreamer;
 
   switch (FileType) {
   case CGFT_AssemblyFile: {
-    MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
-                                      MII, MRI, STI);
+    MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
+        Triple(getTargetTriple()), MAI.getAssemblerDialect(), MAI, MII, MRI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = nullptr;
     if (Options.MCOptions.ShowMCEncoding)
-      MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context);
+      MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
 
     MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
                                                        TargetCPU);
+    auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
-        *Context, Out, Options.MCOptions.AsmVerbose,
+        *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
         Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
         Options.MCOptions.ShowMCInst);
     AsmStreamer.reset(S);
@@ -200,17 +192,19 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
-    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI,
-                                                         *Context);
+    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
     MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
                                                        TargetCPU);
     if (!MCE || !MAB)
       return true;
 
-    AsmStreamer.reset(
-        getTarget()
-            .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE,
-                                    STI, Options.MCOptions.MCRelaxAll));
+    // Don't waste memory on names of temp labels.
+    Context->setUseNamesOnTempLabels(false);
+
+    Triple T(getTargetTriple());
+    AsmStreamer.reset(getTarget().createMCObjectStreamer(
+        T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+        /*DWARFMustBeAtTheEnd*/ true));
     break;
   }
   case CGFT_Null:
@@ -221,13 +215,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   }
 
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
-  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+  FunctionPass *Printer =
+      getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
   if (!Printer)
     return true;
 
-  // If successful, createAsmPrinter took ownership of AsmStreamer.
-  AsmStreamer.release();
-
   PM.add(Printer);
 
   return false;
@@ -238,9 +230,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 /// code is not supported. It fills the MCContext Ctx pointer which can be
 /// used to build custom MCStreamer.
 ///
-bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
-                                          MCContext *&Ctx,
-                                          raw_ostream &Out,
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
+                                          raw_pwrite_stream &Out,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
   Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr);
@@ -252,29 +243,26 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
 
   // Create the code emitter for the target if it exists.  If not, .o file
   // emission fails.
-  const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
-  const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
-  MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(
-      *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx);
+  const MCRegisterInfo &MRI = *getMCRegisterInfo();
+  MCCodeEmitter *MCE =
+      getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
   MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
                                                      TargetCPU);
   if (!MCE || !MAB)
     return true;
 
-  std::unique_ptr<MCStreamer> AsmStreamer;
-  AsmStreamer.reset(getTarget()
-                        .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB,
-                                                Out, MCE, STI,
-                                                Options.MCOptions.MCRelaxAll));
+  Triple T(getTargetTriple());
+  const MCSubtargetInfo &STI = *getMCSubtargetInfo();
+  std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
+      T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+      /*DWARFMustBeAtTheEnd*/ true));
 
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
-  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+  FunctionPass *Printer =
+      getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
   if (!Printer)
     return true;
 
-  // If successful, createAsmPrinter took ownership of AsmStreamer.
-  AsmStreamer.release();
-
   PM.add(Printer);
 
   return false; // success!
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index cdf505e..4321849 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -138,16 +138,3 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
     std::swap(*I, Queue.back());
   Queue.pop_back();
 }
-
-#ifdef NDEBUG
-void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
-#else
-void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
-  LatencyPriorityQueue q = *this;
-  while (!q.empty()) {
-    SUnit *su = q.pop();
-    dbgs() << "Height " << su->getHeight() << ": ";
-    su->dump(DAG);
-  }
-}
-#endif
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
index b621e3b..be61a20 100644
--- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -59,11 +59,11 @@ void LexicalScopes::extractLexicalScopes(
   for (const auto &MBB : *MF) {
     const MachineInstr *RangeBeginMI = nullptr;
     const MachineInstr *PrevMI = nullptr;
-    DebugLoc PrevDL;
+    const DILocation *PrevDL = nullptr;
     for (const auto &MInsn : MBB) {
       // Check if instruction has valid location information.
-      const DebugLoc MIDL = MInsn.getDebugLoc();
-      if (MIDL.isUnknown()) {
+      const DILocation *MIDL = MInsn.getDebugLoc();
+      if (!MIDL) {
         PrevMI = &MInsn;
         continue;
       }
@@ -96,7 +96,7 @@ void LexicalScopes::extractLexicalScopes(
     }
 
     // Create last instruction range.
-    if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+    if (RangeBeginMI && PrevMI && PrevDL) {
       InsnRange R(RangeBeginMI, PrevMI);
       MIRanges.push_back(R);
       MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
@@ -104,30 +104,19 @@ void LexicalScopes::extractLexicalScopes(
   }
 }
 
-LexicalScope *LexicalScopes::findInlinedScope(DebugLoc DL) {
-  MDNode *Scope = nullptr;
-  MDNode *IA = nullptr;
-  DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
-  auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
-  return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
-}
-
 /// findLexicalScope - Find lexical scope, either regular or inlined, for the
 /// given DebugLoc. Return NULL if not found.
-LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
-  MDNode *Scope = nullptr;
-  MDNode *IA = nullptr;
-  DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) {
+  DILocalScope *Scope = DL->getScope();
   if (!Scope)
     return nullptr;
 
   // The scope that we were created with could have an extra file - which
   // isn't what we care about in this case.
-  DIDescriptor D = DIDescriptor(Scope);
-  if (D.isLexicalBlockFile())
-    Scope = DILexicalBlockFile(Scope).getScope();
+  if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
+    Scope = File->getScope();
 
-  if (IA) {
+  if (auto *IA = DL->getInlinedAt()) {
     auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
     return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
   }
@@ -136,47 +125,39 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
 
 /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
 /// not available then create new lexical scope.
-LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
-  if (DL.isUnknown())
-    return nullptr;
-  MDNode *Scope = nullptr;
-  MDNode *InlinedAt = nullptr;
-  DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
-
-  if (InlinedAt) {
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
+                                                     const DILocation *IA) {
+  if (IA) {
     // Create an abstract scope for inlined function.
     getOrCreateAbstractScope(Scope);
     // Create an inlined scope for inlined function.
-    return getOrCreateInlinedScope(Scope, InlinedAt);
+    return getOrCreateInlinedScope(Scope, IA);
   }
 
   return getOrCreateRegularScope(Scope);
 }
 
 /// getOrCreateRegularScope - Find or create a regular lexical scope.
-LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
-  DIDescriptor D = DIDescriptor(Scope);
-  if (D.isLexicalBlockFile()) {
-    Scope = DILexicalBlockFile(Scope).getScope();
-    D = DIDescriptor(Scope);
-  }
+LexicalScope *
+LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) {
+  if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
+    Scope = File->getScope();
 
   auto I = LexicalScopeMap.find(Scope);
   if (I != LexicalScopeMap.end())
     return &I->second;
 
+  // FIXME: Should the following dyn_cast be DILexicalBlock?
   LexicalScope *Parent = nullptr;
-  if (D.isLexicalBlock())
-    Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
-  // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012
-  // compatibility is no longer required.
-  I = LexicalScopeMap.emplace(std::piecewise_construct, std::make_tuple(Scope),
-                              std::make_tuple(Parent, DIDescriptor(Scope),
-                                              nullptr, false)).first;
+  if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+    Parent = getOrCreateLexicalScope(Block->getScope());
+  I = LexicalScopeMap.emplace(std::piecewise_construct,
+                              std::forward_as_tuple(Scope),
+                              std::forward_as_tuple(Parent, Scope, nullptr,
+                                                    false)).first;
 
   if (!Parent) {
-    assert(DIDescriptor(Scope).isSubprogram());
-    assert(DISubprogram(Scope).describes(MF->getFunction()));
+    assert(cast<DISubprogram>(Scope)->describes(MF->getFunction()));
     assert(!CurrentFnLexicalScope);
     CurrentFnLexicalScope = &I->second;
   }
@@ -185,51 +166,49 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
 }
 
 /// getOrCreateInlinedScope - Find or create an inlined lexical scope.
-LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode,
-                                                     MDNode *InlinedAt) {
-  std::pair<const MDNode*, const MDNode*> P(ScopeNode, InlinedAt);
+LexicalScope *
+LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
+                                       const DILocation *InlinedAt) {
+  std::pair<const DILocalScope *, const DILocation *> P(Scope, InlinedAt);
   auto I = InlinedLexicalScopeMap.find(P);
   if (I != InlinedLexicalScopeMap.end())
     return &I->second;
 
   LexicalScope *Parent;
-  DILexicalBlock Scope(ScopeNode);
-  if (Scope.isSubprogram())
-    Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt));
+  if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+    Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt);
   else
-    Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt);
+    Parent = getOrCreateLexicalScope(InlinedAt);
 
-  // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012
-  // compatibility is no longer required.
   I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
-                                     std::make_tuple(P),
-                                     std::make_tuple(Parent, Scope, InlinedAt,
-                                                     false)).first;
+                                     std::forward_as_tuple(P),
+                                     std::forward_as_tuple(Parent, Scope,
+                                                           InlinedAt, false))
+          .first;
   return &I->second;
 }
 
 /// getOrCreateAbstractScope - Find or create an abstract lexical scope.
-LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
-  assert(N && "Invalid Scope encoding!");
+LexicalScope *
+LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) {
+  assert(Scope && "Invalid Scope encoding!");
 
-  DIDescriptor Scope(N);
-  if (Scope.isLexicalBlockFile())
-    Scope = DILexicalBlockFile(Scope).getScope();
+  if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
+    Scope = File->getScope();
   auto I = AbstractScopeMap.find(Scope);
   if (I != AbstractScopeMap.end())
     return &I->second;
 
+  // FIXME: Should the following isa be DILexicalBlock?
   LexicalScope *Parent = nullptr;
-  if (Scope.isLexicalBlock()) {
-    DILexicalBlock DB(Scope);
-    DIDescriptor ParentDesc = DB.getContext();
-    Parent = getOrCreateAbstractScope(ParentDesc);
-  }
+  if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+    Parent = getOrCreateAbstractScope(Block->getScope());
+
   I = AbstractScopeMap.emplace(std::piecewise_construct,
                                std::forward_as_tuple(Scope),
                                std::forward_as_tuple(Parent, Scope,
                                                      nullptr, true)).first;
-  if (Scope.isSubprogram())
+  if (isa<DISubprogram>(Scope))
     AbstractScopesList.push_back(&I->second);
   return &I->second;
 }
@@ -290,7 +269,7 @@ void LexicalScopes::assignInstructionRanges(
 /// have machine instructions that belong to lexical scope identified by
 /// DebugLoc.
 void LexicalScopes::getMachineBasicBlocks(
-    DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
+    const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
   MBBs.clear();
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
@@ -313,7 +292,7 @@ void LexicalScopes::getMachineBasicBlocks(
 
 /// dominates - Return true if DebugLoc's lexical scope dominates at least one
 /// machine instruction's lexical scope in a given machine basic block.
-bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
+bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
     return false;
@@ -325,12 +304,10 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
   bool Result = false;
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
        ++I) {
-    DebugLoc IDL = I->getDebugLoc();
-    if (IDL.isUnknown())
-      continue;
-    if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
-      if (Scope->dominates(IScope))
-        return true;
+    if (const DILocation *IDL = I->getDebugLoc())
+      if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+        if (Scope->dominates(IScope))
+          return true;
   }
   return Result;
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index dc936a3..1571551 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -157,10 +158,10 @@ public:
   UserValue *getNext() const { return next; }
 
   /// match - Does this UserValue match the parameters?
-  bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset,
-             bool indirect) const {
-    return Var == Variable && Expr == Expression && Offset == offset &&
-           indirect == IsIndirect;
+  bool match(const MDNode *Var, const MDNode *Expr, const DILocation *IA,
+             unsigned Offset, bool indirect) const {
+    return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA &&
+           Offset == offset && indirect == IsIndirect;
   }
 
   /// merge - Merge equivalence classes.
@@ -268,15 +269,9 @@ public:
   void emitDebugValues(VirtRegMap *VRM,
                        LiveIntervals &LIS, const TargetInstrInfo &TRI);
 
-  /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
-  /// variable may have more than one corresponding DBG_VALUE instructions.
-  /// Only first one needs DebugLoc to identify variable's lexical scope
-  /// in source file.
-  DebugLoc findDebugLoc();
-
   /// getDebugLoc - Return DebugLoc of this UserValue.
   DebugLoc getDebugLoc() { return dl;}
-  void print(raw_ostream&, const TargetMachine*);
+  void print(raw_ostream &, const TargetRegisterInfo *);
 };
 } // namespace
 
@@ -362,10 +357,47 @@ public:
 };
 } // namespace
 
-void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
-  DIVariable DV(Variable);
+static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+                          const LLVMContext &Ctx) {
+  if (!DL)
+    return;
+
+  auto *Scope = cast<DIScope>(DL.getScope());
+  // Omit the directory, because it's likely to be long and uninteresting.
+  CommentOS << Scope->getFilename();
+  CommentOS << ':' << DL.getLine();
+  if (DL.getCol() != 0)
+    CommentOS << ':' << DL.getCol();
+
+  DebugLoc InlinedAtDL = DL.getInlinedAt();
+  if (!InlinedAtDL)
+    return;
+
+  CommentOS << " @[ ";
+  printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+  CommentOS << " ]";
+}
+
+static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
+                              const DILocation *DL) {
+  const LLVMContext &Ctx = V->getContext();
+  StringRef Res = V->getName();
+  if (!Res.empty())
+    OS << Res << "," << V->getLine();
+  if (auto *InlinedAt = DL->getInlinedAt()) {
+    if (DebugLoc InlinedAtDL = InlinedAt) {
+      OS << " @[";
+      printDebugLoc(InlinedAtDL, OS, Ctx);
+      OS << "]";
+    }
+  }
+}
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+  auto *DV = cast<DILocalVariable>(Variable);
   OS << "!\"";
-  DV.printExtendedName(OS);
+  printExtendedName(OS, DV, dl);
+
   OS << "\"\t";
   if (offset)
     OS << '+' << offset;
@@ -378,7 +410,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
   }
   for (unsigned i = 0, e = locations.size(); i != e; ++i) {
     OS << " Loc" << i << '=';
-    locations[i].print(OS, TM);
+    locations[i].print(OS, TRI);
   }
   OS << '\n';
 }
@@ -386,7 +418,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
 void LDVImpl::print(raw_ostream &OS) {
   OS << "********** DEBUG VARIABLES **********\n";
   for (unsigned i = 0, e = userValues.size(); i != e; ++i)
-    userValues[i]->print(OS, &MF->getTarget());
+    userValues[i]->print(OS, TRI);
 }
 
 void UserValue::coalesceLocation(unsigned LocNo) {
@@ -432,7 +464,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
     UserValue *UV = Leader->getLeader();
     Leader = UV;
     for (; UV; UV = UV->getNext())
-      if (UV->match(Var, Expr, Offset, IsIndirect))
+      if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect))
         return UV;
   }
 
@@ -941,11 +973,6 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
                               std::next(MachineBasicBlock::iterator(MI));
 }
 
-DebugLoc UserValue::findDebugLoc() {
-  DebugLoc D = dl;
-  dl = DebugLoc();
-  return D;
-}
 void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
                                  unsigned LocNo,
                                  LiveIntervals &LIS,
@@ -954,11 +981,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
   MachineOperand &Loc = locations[LocNo];
   ++NumInsertedDebugValues;
 
+  assert(cast<DILocalVariable>(Variable)
+             ->isValidLocationForIntrinsic(getDebugLoc()) &&
+         "Expected inlined-at fields to agree");
   if (Loc.isReg())
-    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
+    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
             IsIndirect, Loc.getReg(), offset, Variable, Expression);
   else
-    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
         .addOperand(Loc)
         .addImm(offset)
         .addMetadata(Variable)
@@ -1004,7 +1034,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
     return;
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
-    DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+    DEBUG(userValues[i]->print(dbgs(), TRI));
     userValues[i]->rewriteLocations(*VRM, *TRI);
     userValues[i]->emitDebugValues(VRM, *LIS, *TII);
   }
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
index 9748329..ac2d1a1 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -33,12 +33,13 @@ class VirtRegMap;
 
 class LiveDebugVariables : public MachineFunctionPass {
   void *pImpl;
-  DenseMap<const Function*, DISubprogram> FunctionDIs;
+  DenseMap<const Function *, DISubprogram *> FunctionDIs;
+
 public:
   static char ID; // Pass identification, replacement for typeid
 
   LiveDebugVariables();
-  ~LiveDebugVariables();
+  ~LiveDebugVariables() override;
 
   /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
   /// @param OldReg Old virtual register that is going away.
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 9423edc..d75e441 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -32,6 +32,274 @@
 #include <algorithm>
 using namespace llvm;
 
+namespace {
+//===----------------------------------------------------------------------===//
+// Implementation of various methods necessary for calculation of live ranges.
+// The implementation of the methods abstracts from the concrete type of the
+// segment collection.
+//
+// Implementation of the class follows the Template design pattern. The base
+// class contains generic algorithms that call collection-specific methods,
+// which are provided in concrete subclasses. In order to avoid virtual calls
+// these methods are provided by means of C++ template instantiation.
+// The base class calls the methods of the subclass through method impl(),
+// which casts 'this' pointer to the type of the subclass.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename ImplT, typename IteratorT, typename CollectionT>
+class CalcLiveRangeUtilBase {
+protected:
+  LiveRange *LR;
+
+protected:
+  CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {}
+
+public:
+  typedef LiveRange::Segment Segment;
+  typedef IteratorT iterator;
+
+  VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) {
+    assert(!Def.isDead() && "Cannot define a value at the dead slot");
+
+    iterator I = impl().find(Def);
+    if (I == segments().end()) {
+      VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+      impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI));
+      return VNI;
+    }
+
+    Segment *S = segmentAt(I);
+    if (SlotIndex::isSameInstr(Def, S->start)) {
+      assert(S->valno->def == S->start && "Inconsistent existing value def");
+
+      // It is possible to have both normal and early-clobber defs of the same
+      // register on an instruction. It doesn't make a lot of sense, but it is
+      // possible to specify in inline assembly.
+      //
+      // Just convert everything to early-clobber.
+      Def = std::min(Def, S->start);
+      if (Def != S->start)
+        S->start = S->valno->def = Def;
+      return S->valno;
+    }
+    assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def");
+    VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+    segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI));
+    return VNI;
+  }
+
+  VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use) {
+    if (segments().empty())
+      return nullptr;
+    iterator I =
+        impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
+    if (I == segments().begin())
+      return nullptr;
+    --I;
+    if (I->end <= StartIdx)
+      return nullptr;
+    if (I->end < Use)
+      extendSegmentEndTo(I, Use);
+    return I->valno;
+  }
+
+  /// This method is used when we want to extend the segment specified
+  /// by I to end at the specified endpoint. To do this, we should
+  /// merge and eliminate all segments that this will overlap
+  /// with. The iterator is not invalidated.
+  void extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
+    assert(I != segments().end() && "Not a valid segment!");
+    Segment *S = segmentAt(I);
+    VNInfo *ValNo = I->valno;
+
+    // Search for the first segment that we can't merge with.
+    iterator MergeTo = std::next(I);
+    for (; MergeTo != segments().end() && NewEnd >= MergeTo->end; ++MergeTo)
+      assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+
+    // If NewEnd was in the middle of a segment, make sure to get its endpoint.
+    S->end = std::max(NewEnd, std::prev(MergeTo)->end);
+
+    // If the newly formed segment now touches the segment after it and if they
+    // have the same value number, merge the two segments into one segment.
+    if (MergeTo != segments().end() && MergeTo->start <= I->end &&
+        MergeTo->valno == ValNo) {
+      S->end = MergeTo->end;
+      ++MergeTo;
+    }
+
+    // Erase any dead segments.
+    segments().erase(std::next(I), MergeTo);
+  }
+
+  /// This method is used when we want to extend the segment specified
+  /// by I to start at the specified endpoint.  To do this, we should
+  /// merge and eliminate all segments that this will overlap with.
+  iterator extendSegmentStartTo(iterator I, SlotIndex NewStart) {
+    assert(I != segments().end() && "Not a valid segment!");
+    Segment *S = segmentAt(I);
+    VNInfo *ValNo = I->valno;
+
+    // Search for the first segment that we can't merge with.
+    iterator MergeTo = I;
+    do {
+      if (MergeTo == segments().begin()) {
+        S->start = NewStart;
+        segments().erase(MergeTo, I);
+        return I;
+      }
+      assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+      --MergeTo;
+    } while (NewStart <= MergeTo->start);
+
+    // If we start in the middle of another segment, just delete a range and
+    // extend that segment.
+    if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+      segmentAt(MergeTo)->end = S->end;
+    } else {
+      // Otherwise, extend the segment right after.
+      ++MergeTo;
+      Segment *MergeToSeg = segmentAt(MergeTo);
+      MergeToSeg->start = NewStart;
+      MergeToSeg->end = S->end;
+    }
+
+    segments().erase(std::next(MergeTo), std::next(I));
+    return MergeTo;
+  }
+
+  iterator addSegment(Segment S) {
+    SlotIndex Start = S.start, End = S.end;
+    iterator I = impl().findInsertPos(S);
+
+    // If the inserted segment starts in the middle or right at the end of
+    // another segment, just extend that segment to contain the segment of S.
+    if (I != segments().begin()) {
+      iterator B = std::prev(I);
+      if (S.valno == B->valno) {
+        if (B->start <= Start && B->end >= Start) {
+          extendSegmentEndTo(B, End);
+          return B;
+        }
+      } else {
+        // Check to make sure that we are not overlapping two live segments with
+        // different valno's.
+        assert(B->end <= Start &&
+               "Cannot overlap two segments with differing ValID's"
+               " (did you def the same reg twice in a MachineInstr?)");
+      }
+    }
+
+    // Otherwise, if this segment ends in the middle of, or right next
+    // to, another segment, merge it into that segment.
+    if (I != segments().end()) {
+      if (S.valno == I->valno) {
+        if (I->start <= End) {
+          I = extendSegmentStartTo(I, Start);
+
+          // If S is a complete superset of a segment, we may need to grow its
+          // endpoint as well.
+          if (End > I->end)
+            extendSegmentEndTo(I, End);
+          return I;
+        }
+      } else {
+        // Check to make sure that we are not overlapping two live segments with
+        // different valno's.
+        assert(I->start >= End &&
+               "Cannot overlap two segments with differing ValID's");
+      }
+    }
+
+    // Otherwise, this is just a new segment that doesn't interact with
+    // anything.
+    // Insert it.
+    return segments().insert(I, S);
+  }
+
+private:
+  ImplT &impl() { return *static_cast<ImplT *>(this); }
+
+  CollectionT &segments() { return impl().segmentsColl(); }
+
+  Segment *segmentAt(iterator I) { return const_cast<Segment *>(&(*I)); }
+};
+
+//===----------------------------------------------------------------------===//
+//   Instantiation of the methods for calculation of live ranges
+//   based on a segment vector.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilVector;
+typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
+                              LiveRange::Segments> CalcLiveRangeUtilVectorBase;
+
+class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase {
+public:
+  CalcLiveRangeUtilVector(LiveRange *LR) : CalcLiveRangeUtilVectorBase(LR) {}
+
+private:
+  friend CalcLiveRangeUtilVectorBase;
+
+  LiveRange::Segments &segmentsColl() { return LR->segments; }
+
+  void insertAtEnd(const Segment &S) { LR->segments.push_back(S); }
+
+  iterator find(SlotIndex Pos) { return LR->find(Pos); }
+
+  iterator findInsertPos(Segment S) {
+    return std::upper_bound(LR->begin(), LR->end(), S.start);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//   Instantiation of the methods for calculation of live ranges
+//   based on a segment set.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilSet;
+typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet,
+                              LiveRange::SegmentSet::iterator,
+                              LiveRange::SegmentSet> CalcLiveRangeUtilSetBase;
+
+class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase {
+public:
+  CalcLiveRangeUtilSet(LiveRange *LR) : CalcLiveRangeUtilSetBase(LR) {}
+
+private:
+  friend CalcLiveRangeUtilSetBase;
+
+  LiveRange::SegmentSet &segmentsColl() { return *LR->segmentSet; }
+
+  void insertAtEnd(const Segment &S) {
+    LR->segmentSet->insert(LR->segmentSet->end(), S);
+  }
+
+  iterator find(SlotIndex Pos) {
+    iterator I =
+        LR->segmentSet->upper_bound(Segment(Pos, Pos.getNextSlot(), nullptr));
+    if (I == LR->segmentSet->begin())
+      return I;
+    iterator PrevI = std::prev(I);
+    if (Pos < (*PrevI).end)
+      return PrevI;
+    return I;
+  }
+
+  iterator findInsertPos(Segment S) {
+    iterator I = LR->segmentSet->upper_bound(S);
+    if (I != LR->segmentSet->end() && !(S.start < *I))
+      ++I;
+    return I;
+  }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+//   LiveRange methods
+//===----------------------------------------------------------------------===//
+
 LiveRange::iterator LiveRange::find(SlotIndex Pos) {
   // This algorithm is basically std::upper_bound.
   // Unfortunately, std::upper_bound cannot be used with mixed types until we
@@ -52,30 +320,11 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) {
 
 VNInfo *LiveRange::createDeadDef(SlotIndex Def,
                                   VNInfo::Allocator &VNInfoAllocator) {
-  assert(!Def.isDead() && "Cannot define a value at the dead slot");
-  iterator I = find(Def);
-  if (I == end()) {
-    VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
-    segments.push_back(Segment(Def, Def.getDeadSlot(), VNI));
-    return VNI;
-  }
-  if (SlotIndex::isSameInstr(Def, I->start)) {
-    assert(I->valno->def == I->start && "Inconsistent existing value def");
-
-    // It is possible to have both normal and early-clobber defs of the same
-    // register on an instruction. It doesn't make a lot of sense, but it is
-    // possible to specify in inline assembly.
-    //
-    // Just convert everything to early-clobber.
-    Def = std::min(Def, I->start);
-    if (Def != I->start)
-      I->start = I->valno->def = Def;
-    return I->valno;
-  }
-  assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
-  VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
-  segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI));
-  return VNI;
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr)
+    return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator);
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator);
 }
 
 // overlaps - Return true if the intersection of the two live ranges is
@@ -236,68 +485,18 @@ void LiveRange::RenumberValues() {
   }
 }
 
-/// This method is used when we want to extend the segment specified by I to end
-/// at the specified endpoint.  To do this, we should merge and eliminate all
-/// segments that this will overlap with.  The iterator is not invalidated.
-void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
-  assert(I != end() && "Not a valid segment!");
-  VNInfo *ValNo = I->valno;
-
-  // Search for the first segment that we can't merge with.
-  iterator MergeTo = std::next(I);
-  for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) {
-    assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
-  }
-
-  // If NewEnd was in the middle of a segment, make sure to get its endpoint.
-  I->end = std::max(NewEnd, std::prev(MergeTo)->end);
-
-  // If the newly formed segment now touches the segment after it and if they
-  // have the same value number, merge the two segments into one segment.
-  if (MergeTo != end() && MergeTo->start <= I->end &&
-      MergeTo->valno == ValNo) {
-    I->end = MergeTo->end;
-    ++MergeTo;
-  }
-
-  // Erase any dead segments.
-  segments.erase(std::next(I), MergeTo);
+void LiveRange::addSegmentToSet(Segment S) {
+  CalcLiveRangeUtilSet(this).addSegment(S);
 }
 
-
-/// This method is used when we want to extend the segment specified by I to
-/// start at the specified endpoint.  To do this, we should merge and eliminate
-/// all segments that this will overlap with.
-LiveRange::iterator
-LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) {
-  assert(I != end() && "Not a valid segment!");
-  VNInfo *ValNo = I->valno;
-
-  // Search for the first segment that we can't merge with.
-  iterator MergeTo = I;
-  do {
-    if (MergeTo == begin()) {
-      I->start = NewStart;
-      segments.erase(MergeTo, I);
-      return I;
-    }
-    assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
-    --MergeTo;
-  } while (NewStart <= MergeTo->start);
-
-  // If we start in the middle of another segment, just delete a range and
-  // extend that segment.
-  if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
-    MergeTo->end = I->end;
-  } else {
-    // Otherwise, extend the segment right after.
-    ++MergeTo;
-    MergeTo->start = NewStart;
-    MergeTo->end = I->end;
+LiveRange::iterator LiveRange::addSegment(Segment S) {
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr) {
+    addSegmentToSet(S);
+    return end();
   }
-
-  segments.erase(std::next(MergeTo), std::next(I));
-  return MergeTo;
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).addSegment(S);
 }
 
 void LiveRange::append(const Segment S) {
@@ -306,69 +505,15 @@ void LiveRange::append(const Segment S) {
   segments.push_back(S);
 }
 
-LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) {
-  SlotIndex Start = S.start, End = S.end;
-  iterator it = std::upper_bound(From, end(), Start);
-
-  // If the inserted segment starts in the middle or right at the end of
-  // another segment, just extend that segment to contain the segment of S.
-  if (it != begin()) {
-    iterator B = std::prev(it);
-    if (S.valno == B->valno) {
-      if (B->start <= Start && B->end >= Start) {
-        extendSegmentEndTo(B, End);
-        return B;
-      }
-    } else {
-      // Check to make sure that we are not overlapping two live segments with
-      // different valno's.
-      assert(B->end <= Start &&
-             "Cannot overlap two segments with differing ValID's"
-             " (did you def the same reg twice in a MachineInstr?)");
-    }
-  }
-
-  // Otherwise, if this segment ends in the middle of, or right next to, another
-  // segment, merge it into that segment.
-  if (it != end()) {
-    if (S.valno == it->valno) {
-      if (it->start <= End) {
-        it = extendSegmentStartTo(it, Start);
-
-        // If S is a complete superset of a segment, we may need to grow its
-        // endpoint as well.
-        if (End > it->end)
-          extendSegmentEndTo(it, End);
-        return it;
-      }
-    } else {
-      // Check to make sure that we are not overlapping two live segments with
-      // different valno's.
-      assert(it->start >= End &&
-             "Cannot overlap two segments with differing ValID's");
-    }
-  }
-
-  // Otherwise, this is just a new segment that doesn't interact with anything.
-  // Insert it.
-  return segments.insert(it, S);
-}
-
 /// extendInBlock - If this range is live before Kill in the basic
 /// block that starts at StartIdx, extend it to be live up to Kill and return
 /// the value. If there is no live range before Kill, return NULL.
 VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
-  if (empty())
-    return nullptr;
-  iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
-  if (I == begin())
-    return nullptr;
-  --I;
-  if (I->end <= StartIdx)
-    return nullptr;
-  if (I->end < Kill)
-    extendSegmentEndTo(I, Kill);
-  return I->valno;
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr)
+    return CalcLiveRangeUtilSet(this).extendInBlock(StartIdx, Kill);
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill);
 }
 
 /// Remove the specified segment from this range.  Note that the segment must
@@ -424,13 +569,9 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
 /// Also remove the value# from value# list.
 void LiveRange::removeValNo(VNInfo *ValNo) {
   if (empty()) return;
-  iterator I = end();
-  iterator E = begin();
-  do {
-    --I;
-    if (I->valno == ValNo)
-      segments.erase(I);
-  } while (I != E);
+  segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) {
+    return S.valno == ValNo;
+  }), end());
   // Now that ValNo is dead, remove it.
   markValNoForDeletion(ValNo);
 }
@@ -598,6 +739,21 @@ VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
   return V2;
 }
 
+void LiveRange::flushSegmentSet() {
+  assert(segmentSet != nullptr && "segment set must have been created");
+  assert(
+      segments.empty() &&
+      "segment set can be used only initially before switching to the array");
+  segments.append(segmentSet->begin(), segmentSet->end());
+  segmentSet = nullptr;
+  verify();
+}
+
+void LiveInterval::freeSubRange(SubRange *S) {
+  S->~SubRange();
+  // Memory was allocated with BumpPtr allocator and is not freed here.
+}
+
 void LiveInterval::removeEmptySubRanges() {
   SubRange **NextPtr = &SubRanges;
   SubRange *I = *NextPtr;
@@ -609,12 +765,22 @@ void LiveInterval::removeEmptySubRanges() {
     }
     // Skip empty subranges until we find the first nonempty one.
     do {
-      I = I->Next;
+      SubRange *Next = I->Next;
+      freeSubRange(I);
+      I = Next;
     } while (I != nullptr && I->empty());
     *NextPtr = I;
   }
 }
 
+void LiveInterval::clearSubRanges() {
+  for (SubRange *I = SubRanges, *Next; I != nullptr; I = Next) {
+    Next = I->Next;
+    freeSubRange(I);
+  }
+  SubRanges = nullptr;
+}
+
 /// Helper function for constructMainRangeFromSubranges(): Search the CFG
 /// backwards until we find a place covered by a LiveRange segment that actually
 /// has a valno set.
@@ -650,23 +816,45 @@ static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR,
 
 static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) {
   SmallPtrSet<const MachineBasicBlock*, 5> Visited;
-  for (LiveRange::Segment &S : LI.segments) {
-    if (S.valno != nullptr)
-      continue;
-    // This can only happen at the begin of a basic block.
-    assert(S.start.isBlock() && "valno should only be missing at block begin");
-
-    Visited.clear();
-    const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
-    for (const MachineBasicBlock *Pred : MBB->predecessors()) {
-      VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
-      if (VNI != nullptr) {
-        S.valno = VNI;
-        break;
+
+  LiveRange::iterator OutIt;
+  VNInfo *PrevValNo = nullptr;
+  for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    LiveRange::Segment &S = *I;
+    // Determine final VNI if necessary.
+    if (S.valno == nullptr) {
+      // This can only happen at the begin of a basic block.
+      assert(S.start.isBlock() && "valno should only be missing at block begin");
+
+      Visited.clear();
+      const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
+      for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+        VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
+        if (VNI != nullptr) {
+          S.valno = VNI;
+          break;
+        }
       }
+      assert(S.valno != nullptr && "could not determine valno");
+    }
+    // Merge with previous segment if it has the same VNI.
+    if (PrevValNo == S.valno && OutIt->end == S.start) {
+      OutIt->end = S.end;
+    } else {
+      // Didn't merge. Move OutIt to next segment.
+      if (PrevValNo == nullptr)
+        OutIt = LI.begin();
+      else
+        ++OutIt;
+
+      if (OutIt != I)
+        *OutIt = *I;
+      PrevValNo = S.valno;
     }
-    assert(S.valno != nullptr && "could not determine valno");
   }
+  // If we merged some segments chop off the end.
+  ++OutIt;
+  LI.segments.erase(OutIt, LI.end());
 }
 
 void LiveInterval::constructMainRangeFromSubranges(
@@ -789,6 +977,12 @@ void LiveInterval::constructMainRangeFromSubranges(
             NeedVNIFixup = true;
         }
 
+        // In rare cases we can produce adjacent segments with the same value
+        // number (if they come from different subranges, but happen to have
+        // the same defining instruction). VNIFixup will fix those cases.
+        if (!empty() && segments.back().end == Pos &&
+            segments.back().valno == VNI)
+          NeedVNIFixup = true;
         CurrentSegment.start = Pos;
         CurrentSegment.valno = VNI;
         ConstructingSegment = true;
@@ -997,6 +1191,13 @@ static inline bool coalescable(const LiveRange::Segment &A,
 void LiveRangeUpdater::add(LiveRange::Segment Seg) {
   assert(LR && "Cannot add to a null destination");
 
+  // Fall back to the regular add method if the live range
+  // is using the segment set instead of the segment vector.
+  if (LR->segmentSet != nullptr) {
+    LR->addSegmentToSet(Seg);
+    return;
+  }
+
   // Flush the state if Start moves backwards.
   if (!LastStart.isValid() || LastStart > Seg.start) {
     if (isDirty())
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 56f38b6..adca4cc 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -67,6 +67,13 @@ static cl::opt<bool> EnableSubRegLiveness(
   "enable-subreg-liveness", cl::Hidden, cl::init(true),
   cl::desc("Enable subregister liveness tracking."));
 
+namespace llvm {
+cl::opt<bool> UseSegmentSetForPhysRegs(
+    "use-segment-set-for-physregs", cl::Hidden, cl::init(true),
+    cl::desc(
+        "Use segment set for the computation of the live ranges of physregs."));
+}
+
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<AliasAnalysis>();
@@ -192,7 +199,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
   assert(LRCalc && "LRCalc not initialized.");
   assert(LI.empty() && "Should only compute empty intervals.");
   LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
-  LRCalc->calculate(LI);
+  LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
   computeDeadValues(LI, nullptr);
 }
 
@@ -268,6 +275,10 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
         LRCalc->extendToUses(LR, Reg);
     }
   }
+
+  // Flush the segment set to the segment vector.
+  if (UseSegmentSetForPhysRegs)
+    LR.flushSegmentSet();
 }
 
 
@@ -300,7 +311,8 @@ void LiveIntervals::computeLiveInRegUnits() {
         unsigned Unit = *Units;
         LiveRange *LR = RegUnitRanges[Unit];
         if (!LR) {
-          LR = RegUnitRanges[Unit] = new LiveRange();
+          // Use segment set to speed-up initial computation of the live range.
+          LR = RegUnitRanges[Unit] = new LiveRange(UseSegmentSetForPhysRegs);
           NewRanges.push_back(Unit);
         }
         VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
@@ -448,23 +460,34 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
   for (auto VNI : LI.valnos) {
     if (VNI->isUnused())
       continue;
-    LiveRange::iterator I = LI.FindSegmentContaining(VNI->def);
+    SlotIndex Def = VNI->def;
+    LiveRange::iterator I = LI.FindSegmentContaining(Def);
     assert(I != LI.end() && "Missing segment for VNI");
-    if (I->end != VNI->def.getDeadSlot())
+
+    // Is the register live before? Otherwise we may have to add a read-undef
+    // flag for subregister defs.
+    if (MRI->shouldTrackSubRegLiveness(LI.reg)) {
+      if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
+        MachineInstr *MI = getInstructionFromIndex(Def);
+        MI->addRegisterDefReadUndef(LI.reg);
+      }
+    }
+
+    if (I->end != Def.getDeadSlot())
       continue;
     if (VNI->isPHIDef()) {
       // This is a dead PHI. Remove it.
       VNI->markUnused();
       LI.removeSegment(I);
-      DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+      DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
       PHIRemoved = true;
     } else {
       // This is a dead def. Make sure the instruction knows.
-      MachineInstr *MI = getInstructionFromIndex(VNI->def);
+      MachineInstr *MI = getInstructionFromIndex(Def);
       assert(MI && "No instruction defining live value");
       MI->addRegisterDead(LI.reg, TRI);
       if (dead && MI->allDefsAreDead()) {
-        DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
+        DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
         dead->push_back(MI);
       }
     }
@@ -609,15 +632,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
   }
 }
 
-void LiveIntervals::pruneValue(LiveInterval &LI, SlotIndex Kill,
-                               SmallVectorImpl<SlotIndex> *EndPoints) {
-  pruneValue((LiveRange&)LI, Kill, EndPoints);
-
-  for (LiveInterval::SubRange &SR : LI.subranges()) {
-    pruneValue(SR, Kill, nullptr);
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Register allocator hooks.
 //
@@ -648,7 +662,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
       RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
     }
 
-    if (MRI->tracksSubRegLiveness()) {
+    if (MRI->subRegLivenessEnabled()) {
       SRs.clear();
       for (const LiveInterval::SubRange &SR : LI.subranges()) {
         SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
@@ -686,7 +700,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         goto CancelKill;
       }
 
-      if (MRI->tracksSubRegLiveness()) {
+      if (MRI->subRegLivenessEnabled()) {
         // When reading a partial undefined value we must not add a kill flag.
         // The regalloc might have used the undef lane for something else.
         // Example:
@@ -1376,3 +1390,25 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
     repairOldRegInRange(Begin, End, endIdx, LI, Reg);
   }
 }
+
+void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
+  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+    if (LiveRange *LR = getCachedRegUnit(*Units))
+      if (VNInfo *VNI = LR->getVNInfoAt(Pos))
+        LR->removeValNo(VNI);
+  }
+}
+
+void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
+  VNInfo *VNI = LI.getVNInfoAt(Pos);
+  if (VNI == nullptr)
+    return;
+  LI.removeValNo(VNI);
+
+  // Also remove the value in subranges.
+  for (LiveInterval::SubRange &S : LI.subranges()) {
+    if (VNInfo *SVNI = S.getVNInfoAt(Pos))
+      S.removeValNo(SVNI);
+  }
+  LI.removeEmptySubRanges();
+}
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index 7efd941..eef7643 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -16,17 +16,23 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 
 /// \brief Remove all registers from the set that get clobbered by the register
 /// mask.
-void LivePhysRegs::removeRegsInMask(const MachineOperand &MO) {
+/// The clobbers set will be the list of live registers clobbered
+/// by the regmask.
+void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
+        SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) {
   SparseSet<unsigned>::iterator LRI = LiveRegs.begin();
   while (LRI != LiveRegs.end()) {
-    if (MO.clobbersPhysReg(*LRI))
+    if (MO.clobbersPhysReg(*LRI)) {
+      if (Clobbers)
+        Clobbers->push_back(std::make_pair(*LRI, &MO));
       LRI = LiveRegs.erase(LRI);
-    else
+    } else
       ++LRI;
   }
 }
@@ -44,7 +50,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
         continue;
       removeReg(Reg);
     } else if (O->isRegMask())
-      removeRegsInMask(*O);
+      removeRegsInMask(*O, nullptr);
   }
 
   // Add uses to the set.
@@ -62,8 +68,8 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
 /// killed-uses, add defs. This is the not recommended way, because it depends
 /// on accurate kill flags. If possible use stepBackwards() instead of this
 /// function.
-void LivePhysRegs::stepForward(const MachineInstr &MI) {
-  SmallVector<unsigned, 4> Defs;
+void LivePhysRegs::stepForward(const MachineInstr &MI,
+        SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
   // Remove killed registers from the set.
   for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
     if (O->isReg()) {
@@ -71,8 +77,9 @@ void LivePhysRegs::stepForward(const MachineInstr &MI) {
       if (Reg == 0)
         continue;
       if (O->isDef()) {
-        if (!O->isDead())
-          Defs.push_back(Reg);
+        // Note, dead defs are still recorded.  The caller should decide how to
+        // handle them.
+        Clobbers.push_back(std::make_pair(Reg, &*O));
       } else {
         if (!O->isKill())
           continue;
@@ -80,12 +87,16 @@ void LivePhysRegs::stepForward(const MachineInstr &MI) {
         removeReg(Reg);
       }
     } else if (O->isRegMask())
-      removeRegsInMask(*O);
+      removeRegsInMask(*O, &Clobbers);
   }
 
   // Add defs to the set.
-  for (unsigned i = 0, e = Defs.size(); i != e; ++i)
-    addReg(Defs[i]);
+  for (auto Reg : Clobbers) {
+    // Skip dead defs.  They shouldn't be added to the set.
+    if (Reg.second->isReg() && Reg.second->isDead())
+      continue;
+    addReg(Reg.first);
+  }
 }
 
 /// Prin the currently live registers to OS.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 1d46161..bb2877a 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -43,17 +43,14 @@ void LiveRangeCalc::reset(const MachineFunction *mf,
 static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
                           LiveRange &LR, const MachineOperand &MO) {
     const MachineInstr *MI = MO.getParent();
-    SlotIndex DefIdx;
-    if (MI->isPHI())
-      DefIdx = Indexes.getMBBStartIdx(MI->getParent());
-    else
-      DefIdx = Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
+    SlotIndex DefIdx =
+        Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
 
     // Create the def in LR. This may find an existing def.
     LR.createDeadDef(DefIdx, Alloc);
 }
 
-void LiveRangeCalc::calculate(LiveInterval &LI) {
+void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
   assert(MRI && Indexes && "call reset() first");
 
   // Step 1: Create minimal live segments for every definition of Reg.
@@ -66,7 +63,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI) {
       continue;
 
     unsigned SubReg = MO.getSubReg();
-    if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) {
+    if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
       unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
                                   : MRI->getMaxLaneMaskForVReg(Reg);
 
@@ -222,23 +219,23 @@ void LiveRangeCalc::updateFromLiveIns() {
 }
 
 
-void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) {
-  assert(Kill.isValid() && "Invalid SlotIndex");
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
+  assert(Use.isValid() && "Invalid SlotIndex");
   assert(Indexes && "Missing SlotIndexes");
   assert(DomTree && "Missing dominator tree");
 
-  MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
-  assert(KillMBB && "No MBB at Kill");
+  MachineBasicBlock *UseMBB = Indexes->getMBBFromIndex(Use.getPrevSlot());
+  assert(UseMBB && "No MBB at Use");
 
   // Is there a def in the same MBB we can extend?
-  if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
+  if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use))
     return;
 
-  // Find the single reaching def, or determine if Kill is jointly dominated by
+  // Find the single reaching def, or determine if Use is jointly dominated by
   // multiple values, and we may need to create even more phi-defs to preserve
   // VNInfo SSA form.  Perform a search for all predecessor blocks where we
   // know the dominating VNInfo.
-  if (findReachingDefs(LR, *KillMBB, Kill, PhysReg))
+  if (findReachingDefs(LR, *UseMBB, Use, PhysReg))
     return;
 
   // When there were multiple different values, we may need new PHIs.
@@ -257,12 +254,12 @@ void LiveRangeCalc::calculateValues() {
 }
 
 
-bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
-                                     SlotIndex Kill, unsigned PhysReg) {
-  unsigned KillMBBNum = KillMBB.getNumber();
+bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
+                                     SlotIndex Use, unsigned PhysReg) {
+  unsigned UseMBBNum = UseMBB.getNumber();
 
   // Block numbers where LR should be live-in.
-  SmallVector<unsigned, 16> WorkList(1, KillMBBNum);
+  SmallVector<unsigned, 16> WorkList(1, UseMBBNum);
 
   // Remember if we have seen more than one value.
   bool UniqueVNI = true;
@@ -275,13 +272,19 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
 #ifndef NDEBUG
     if (MBB->pred_empty()) {
       MBB->getParent()->verify();
+      errs() << "Use of " << PrintReg(PhysReg)
+             << " does not have a corresponding definition on every path:\n";
+      const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
+      if (MI != nullptr)
+        errs() << Use << " " << *MI;
       llvm_unreachable("Use not jointly dominated by defs.");
     }
 
     if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
         !MBB->isLiveIn(PhysReg)) {
       MBB->getParent()->verify();
-      errs() << "The register needs to be live in to BB#" << MBB->getNumber()
+      errs() << "The register " << PrintReg(PhysReg)
+             << " needs to be live in to BB#" << MBB->getNumber()
              << ", but is missing from the live-in list.\n";
       llvm_unreachable("Invalid global physical register");
     }
@@ -316,11 +319,11 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
        }
 
        // No, we need a live-in value for Pred as well
-       if (Pred != &KillMBB)
+       if (Pred != &UseMBB)
           WorkList.push_back(Pred->getNumber());
        else
-          // Loopback to KillMBB, so value is really live through.
-         Kill = SlotIndex();
+          // Loopback to UseMBB, so value is really live through.
+         Use = SlotIndex();
     }
   }
 
@@ -338,9 +341,9 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
          E = WorkList.end(); I != E; ++I) {
        SlotIndex Start, End;
        std::tie(Start, End) = Indexes->getMBBRange(*I);
-       // Trim the live range in KillMBB.
-       if (*I == KillMBBNum && Kill.isValid())
-         End = Kill;
+       // Trim the live range in UseMBB.
+       if (*I == UseMBBNum && Use.isValid())
+         End = Use;
        else
          Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr);
        Updater.add(Start, End, TheVNI);
@@ -355,8 +358,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
        I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
     MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
     addLiveInBlock(LR, DomTree->getNode(MBB));
-    if (MBB == &KillMBB)
-      LiveIn.back().Kill = Kill;
+    if (MBB == &UseMBB)
+      LiveIn.back().Kill = Use;
   }
 
   return false;
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index 1b9099b..34d9953 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -101,17 +101,17 @@ class LiveRangeCalc {
   /// used to add entries directly.
   SmallVector<LiveInBlock, 16> LiveIn;
 
-  /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set
-  /// of defs that can reach it.
+  /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can
+  /// reach it.
   ///
-  /// If only one def can reach Kill, all paths from the def to kill are added
-  /// to LI, and the function returns true.
+  /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB
+  /// are added to @p LR, and the function returns true.
   ///
-  /// If multiple values can reach Kill, the blocks that need LI to be live in
-  /// are added to the LiveIn array, and the function returns false.
+  /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
+  /// live in are added to the LiveIn array, and the function returns false.
   ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
-  bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
+  bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
                         SlotIndex Kill, unsigned PhysReg);
 
   /// updateSSA - Compute the values that will be live in to all requested
@@ -162,15 +162,14 @@ public:
   // Modify existing live ranges.
   //
 
-  /// extend - Extend the live range of LI to reach Kill.
+  /// Extend the live range of @p LR to reach @p Use.
   ///
-  /// The existing values in LI must be live so they jointly dominate Kill.  If
-  /// Kill is not dominated by a single existing value, PHI-defs are inserted
-  /// as required to preserve SSA form.  If Kill is known to be dominated by a
-  /// single existing value, Alloc may be null.
+  /// The existing values in @p LR must be live so they jointly dominate @p Use.
+  /// If @p Use is not dominated by a single existing value, PHI-defs are
+  /// inserted as required to preserve SSA form.
   ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
-  void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0);
+  void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0);
 
   /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
   /// Each instruction defining Reg gets a new VNInfo with a corresponding
@@ -188,7 +187,7 @@ public:
   /// Calculates liveness for the register specified in live interval @p LI.
   /// Creates subregister live ranges as needed if subreg liveness tracking is
   /// enabled.
-  void calculate(LiveInterval &LI);
+  void calculate(LiveInterval &LI, bool TrackSubRegs);
 
   //===--------------------------------------------------------------------===//
   // Low-level interface.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 47da205..27c57d5 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -196,7 +196,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
   // We also need to make sure it is safe to move the load.
   // Assume there are stores between DefMI and UseMI.
   bool SawStore = true;
-  if (!DefMI->isSafeToMove(&TII, nullptr, SawStore))
+  if (!DefMI->isSafeToMove(nullptr, SawStore))
     return false;
 
   DEBUG(dbgs() << "Try to fold single def: " << *DefMI
@@ -235,7 +235,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
 
   // Use the same criteria as DeadMachineInstructionElim.
   bool SawStore = false;
-  if (!MI->isSafeToMove(&TII, nullptr, SawStore)) {
+  if (!MI->isSafeToMove(nullptr, SawStore)) {
     DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
     return;
   }
@@ -256,15 +256,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
       // Check if MI reads any unreserved physregs.
       if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
         ReadsPhysRegs = true;
-      else if (MOI->isDef()) {
-        for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo());
-             Units.isValid(); ++Units) {
-          if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
-            if (VNInfo *VNI = LR->getVNInfoAt(Idx))
-              LR->removeValNo(VNI);
-          }
-        }
-      }
+      else if (MOI->isDef())
+        LIS.removePhysRegDefAt(Reg, Idx);
       continue;
     }
     LiveInterval &LI = LIS.getInterval(Reg);
@@ -280,21 +273,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
 
     // Remove defined value.
     if (MOI->isDef()) {
-      if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
-        if (TheDelegate)
-          TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
-        LI.removeValNo(VNI);
-        if (LI.empty()) {
-          RegsToErase.push_back(Reg);
-        } else {
-          // Also remove the value in subranges.
-          for (LiveInterval::SubRange &S : LI.subranges()) {
-            if (VNInfo *SVNI = S.getVNInfoAt(Idx))
-              S.removeValNo(SVNI);
-          }
-          LI.removeEmptySubRanges();
-        }
-      }
+      if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
+        TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+      LIS.removeVRegDefAt(LI, Idx);
+      if (LI.empty())
+        RegsToErase.push_back(Reg);
     }
   }
 
@@ -416,7 +399,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
   VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI);
   for (unsigned I = 0, Size = size(); I < Size; ++I) {
     LiveInterval &LI = LIS.getInterval(get(I));
-    if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
+    if (MRI.recomputeRegClass(LI.reg))
       DEBUG({
         const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
         dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
index 8a6ac25..5c9c679 100644
--- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -61,8 +61,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
   assert(Slot >= 0 && "Spill slot indice must be >= 0");
   SS2IntervalMap::iterator I = S2IMap.find(Slot);
   if (I == S2IMap.end()) {
-    I = S2IMap.insert(I, std::make_pair(Slot,
-            LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+    I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot),
+                       std::forward_as_tuple(
+                           TargetRegisterInfo::index2StackSlot(Slot), 0.0F))
+            .first;
     S2RCMap.insert(std::make_pair(Slot, RC));
   } else {
     // Use the largest common subclass register class.
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index c4bca5f..11deb81 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -36,6 +36,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include <algorithm>
 using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index e8bf687..8378429 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -252,7 +252,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
 }
 
 static inline bool
-lookupCandidateBaseReg(int64_t BaseOffset,
+lookupCandidateBaseReg(unsigned BaseReg,
+                       int64_t BaseOffset,
                        int64_t FrameSizeAdjust,
                        int64_t LocalFrameOffset,
                        const MachineInstr *MI,
@@ -260,7 +261,7 @@ lookupCandidateBaseReg(int64_t BaseOffset,
   // Check if the relative offset from the where the base register references
   // to the target address is in range for the instruction.
   int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
-  return TRI->isFrameOffsetLegal(MI, Offset);
+  return TRI->isFrameOffsetLegal(MI, BaseReg, Offset);
 }
 
 bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -362,8 +363,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
     // instruction itself will be taken into account by the target,
     // so we don't have to adjust for it here when reusing a base
     // register.
-    if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
-                                              LocalOffset, MI, TRI)) {
+    if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset,
+                                              FrameSizeAdjust, LocalOffset, MI,
+                                              TRI)) {
       DEBUG(dbgs() << "  Reusing base register " << BaseReg << "\n");
       // We found a register to reuse.
       Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
@@ -382,7 +384,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
       // then don't bother creating it.
       if (ref + 1 >= e ||
           !lookupCandidateBaseReg(
-              BaseOffset, FrameSizeAdjust,
+              BaseReg, BaseOffset, FrameSizeAdjust,
               FrameReferenceInsns[ref + 1].getLocalOffset(),
               FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
         BaseOffset = PrevBaseOffset;
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 3c73905..d5fdf8e 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -53,7 +53,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
     const MachineFunction *MF = getParent();
     MCContext &Ctx = MF->getContext();
     const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
-    CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+    CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
                                            Twine(MF->getFunctionNumber()) +
                                            "_" + Twine(getNumber()));
   }
@@ -250,7 +250,7 @@ std::string MachineBasicBlock::getFullName() const {
   if (getBasicBlock())
     Name += getBasicBlock()->getName();
   else
-    Name += (Twine("BB") + Twine(getNumber())).str();
+    Name += ("BB" + Twine(getNumber())).str();
   return Name;
 }
 
@@ -307,7 +307,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     OS << '\t';
     if (I->isInsideBundle())
       OS << "  * ";
-    I->print(OS, &getParent()->getTarget());
+    I->print(OS);
   }
 
   // Print the successors of this block according to the CFG.
@@ -1129,21 +1129,19 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
 /// instructions after (searching just for defs) MI.
 MachineBasicBlock::LivenessQueryResult
 MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
-                                           unsigned Reg, MachineInstr *MI,
-                                           unsigned Neighborhood) {
+                                           unsigned Reg, const_iterator Before,
+                                           unsigned Neighborhood) const {
   unsigned N = Neighborhood;
-  MachineBasicBlock *MBB = MI->getParent();
 
-  // Start by searching backwards from MI, looking for kills, reads or defs.
-
-  MachineBasicBlock::iterator I(MI);
+  // Start by searching backwards from Before, looking for kills, reads or defs.
+  const_iterator I(Before);
   // If this is the first insn in the block, don't search backwards.
-  if (I != MBB->begin()) {
+  if (I != begin()) {
     do {
       --I;
 
       MachineOperandIteratorBase::PhysRegInfo Analysis =
-        MIOperands(I).analyzePhysReg(Reg, TRI);
+        ConstMIOperands(I).analyzePhysReg(Reg, TRI);
 
       if (Analysis.Defines)
         // Outputs happen after inputs so they take precedence if both are
@@ -1158,15 +1156,15 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
         // Defined or read without a previous kill - live.
         return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
 
-    } while (I != MBB->begin() && --N > 0);
+    } while (I != begin() && --N > 0);
   }
 
   // Did we get to the start of the block?
-  if (I == MBB->begin()) {
+  if (I == begin()) {
     // If so, the register's state is definitely defined by the live-in state.
     for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
          RAI.isValid(); ++RAI) {
-      if (MBB->isLiveIn(*RAI))
+      if (isLiveIn(*RAI))
         return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
     }
 
@@ -1175,13 +1173,13 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
 
   N = Neighborhood;
 
-  // Try searching forwards from MI, looking for reads or defs.
-  I = MachineBasicBlock::iterator(MI);
+  // Try searching forwards from Before, looking for reads or defs.
+  I = const_iterator(Before);
   // If this is the last insn in the block, don't search forwards.
-  if (I != MBB->end()) {
-    for (++I; I != MBB->end() && N > 0; ++I, --N) {
+  if (I != end()) {
+    for (++I; I != end() && N > 0; ++I, --N) {
       MachineOperandIteratorBase::PhysRegInfo Analysis =
-        MIOperands(I).analyzePhysReg(Reg, TRI);
+        ConstMIOperands(I).analyzePhysReg(Reg, TRI);
 
       if (Analysis.ReadsOverlap)
         // Used, therefore must have been live.
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index aaa7d91..2969bad 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -40,13 +41,14 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 using namespace llvm;
 
-#define DEBUG_TYPE "block-placement2"
+#define DEBUG_TYPE "block-placement"
 
 STATISTIC(NumCondBranches, "Number of conditional branches");
 STATISTIC(NumUncondBranches, "Number of uncondittional branches");
@@ -61,11 +63,23 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
                                        cl::init(0), cl::Hidden);
 
 // FIXME: Find a good default for this flag and remove the flag.
-static cl::opt<unsigned>
-ExitBlockBias("block-placement-exit-block-bias",
-              cl::desc("Block frequency percentage a loop exit block needs "
-                       "over the original exit to be considered the new exit."),
-              cl::init(0), cl::Hidden);
+static cl::opt<unsigned> ExitBlockBias(
+    "block-placement-exit-block-bias",
+    cl::desc("Block frequency percentage a loop exit block needs "
+             "over the original exit to be considered the new exit."),
+    cl::init(0), cl::Hidden);
+
+static cl::opt<bool> OutlineOptionalBranches(
+    "outline-optional-branches",
+    cl::desc("Put completely optional branches, i.e. branches with a common "
+             "post dominator, out of line."),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> OutlineOptionalThreshold(
+    "outline-optional-threshold",
+    cl::desc("Don't outline optional branches that are a single block with an "
+             "instruction count below this threshold"),
+    cl::init(4), cl::Hidden);
 
 namespace {
 class BlockChain;
@@ -107,7 +121,7 @@ public:
   /// function. It also registers itself as the chain that block participates
   /// in with the BlockToChain mapping.
   BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
-    : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+      : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
     assert(BB && "Cannot create a chain with a null basic block");
     BlockToChain[BB] = this;
   }
@@ -144,19 +158,18 @@ public:
 
     // Update the incoming blocks to point to this chain, and add them to the
     // chain structure.
-    for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
-         BI != BE; ++BI) {
-      Blocks.push_back(*BI);
-      assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
-      BlockToChain[*BI] = this;
+    for (MachineBasicBlock *ChainBB : *Chain) {
+      Blocks.push_back(ChainBB);
+      assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
+      BlockToChain[ChainBB] = this;
     }
   }
 
 #ifndef NDEBUG
   /// \brief Dump the blocks in this chain.
   LLVM_DUMP_METHOD void dump() {
-    for (iterator I = begin(), E = end(); I != E; ++I)
-      (*I)->dump();
+    for (MachineBasicBlock *MBB : *this)
+      MBB->dump();
   }
 #endif // NDEBUG
 
@@ -188,6 +201,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// \brief A handle to the target's lowering info.
   const TargetLoweringBase *TLI;
 
+  /// \brief A handle to the post dominator tree.
+  MachineDominatorTree *MDT;
+
+  /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
+  /// all terminators of the MachineFunction.
+  SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
+
   /// \brief Allocator and owner of BlockChain structures.
   ///
   /// We build BlockChains lazily while processing the loop structure of
@@ -205,28 +225,26 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// between basic blocks.
   DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
 
-  void markChainSuccessors(BlockChain &Chain,
-                           MachineBasicBlock *LoopHeaderBB,
+  void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
                            SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
                            const BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
                                          BlockChain &Chain,
                                          const BlockFilterSet *BlockFilter);
-  MachineBasicBlock *selectBestCandidateBlock(
-      BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
-      const BlockFilterSet *BlockFilter);
-  MachineBasicBlock *getFirstUnplacedBlock(
-      MachineFunction &F,
-      const BlockChain &PlacedChain,
-      MachineFunction::iterator &PrevUnplacedBlockIt,
-      const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *
+  selectBestCandidateBlock(BlockChain &Chain,
+                           SmallVectorImpl<MachineBasicBlock *> &WorkList,
+                           const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *
+  getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
+                        MachineFunction::iterator &PrevUnplacedBlockIt,
+                        const BlockFilterSet *BlockFilter);
   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
                   SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
                   const BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *findBestLoopTop(MachineLoop &L,
                                      const BlockFilterSet &LoopBlockSet);
-  MachineBasicBlock *findBestLoopExit(MachineFunction &F,
-                                      MachineLoop &L,
+  MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
                                       const BlockFilterSet &LoopBlockSet);
   void buildLoopChains(MachineFunction &F, MachineLoop &L);
   void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
@@ -244,6 +262,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.addRequired<MachineDominatorTree>();
     AU.addRequired<MachineLoopInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -252,12 +271,13 @@ public:
 
 char MachineBlockPlacement::ID = 0;
 char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
                       "Branch Probability Basic Block Placement", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
                     "Branch Probability Basic Block Placement", false, false)
 
 #ifndef NDEBUG
@@ -267,8 +287,8 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
 static std::string getBlockName(MachineBasicBlock *BB) {
   std::string Result;
   raw_string_ostream OS(Result);
-  OS << "BB#" << BB->getNumber()
-     << " (derived from LLVM BB '" << BB->getName() << "')";
+  OS << "BB#" << BB->getNumber();
+  OS << " (derived from LLVM BB '" << BB->getName() << "')";
   OS.flush();
   return Result;
 }
@@ -292,26 +312,22 @@ static std::string getBlockNum(MachineBasicBlock *BB) {
 /// having one fewer active predecessor. It also adds any successors of this
 /// chain which reach the zero-predecessor state to the worklist passed in.
 void MachineBlockPlacement::markChainSuccessors(
-    BlockChain &Chain,
-    MachineBasicBlock *LoopHeaderBB,
+    BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
     const BlockFilterSet *BlockFilter) {
   // Walk all the blocks in this chain, marking their successors as having
   // a predecessor placed.
-  for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
-       CBI != CBE; ++CBI) {
+  for (MachineBasicBlock *MBB : Chain) {
     // Add any successors for which this is the only un-placed in-loop
     // predecessor to the worklist as a viable candidate for CFG-neutral
     // placement. No subsequent placement of this block will violate the CFG
     // shape, so we get to use heuristics to choose a favorable placement.
-    for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
-                                          SE = (*CBI)->succ_end();
-         SI != SE; ++SI) {
-      if (BlockFilter && !BlockFilter->count(*SI))
+    for (MachineBasicBlock *Succ : MBB->successors()) {
+      if (BlockFilter && !BlockFilter->count(Succ))
         continue;
-      BlockChain &SuccChain = *BlockToChain[*SI];
+      BlockChain &SuccChain = *BlockToChain[Succ];
       // Disregard edges within a fixed chain, or edges to the loop header.
-      if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+      if (&Chain == &SuccChain || Succ == LoopHeaderBB)
         continue;
 
       // This is a cross-chain edge that is within the loop, so decrement the
@@ -331,9 +347,10 @@ void MachineBlockPlacement::markChainSuccessors(
 /// very hot successor edges.
 ///
 /// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
-    MachineBasicBlock *BB, BlockChain &Chain,
-    const BlockFilterSet *BlockFilter) {
+MachineBasicBlock *
+MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
+                                           BlockChain &Chain,
+                                           const BlockFilterSet *BlockFilter) {
   const BranchProbability HotProb(4, 5); // 80%
 
   MachineBasicBlock *BestSucc = nullptr;
@@ -347,65 +364,85 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
   uint32_t WeightScale = 0;
   uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
   DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-                                        SE = BB->succ_end();
-       SI != SE; ++SI) {
-    if (BlockFilter && !BlockFilter->count(*SI))
+  for (MachineBasicBlock *Succ : BB->successors()) {
+    if (BlockFilter && !BlockFilter->count(Succ))
       continue;
-    BlockChain &SuccChain = *BlockToChain[*SI];
+    BlockChain &SuccChain = *BlockToChain[Succ];
     if (&SuccChain == &Chain) {
-      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
+      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Already merged!\n");
       continue;
     }
-    if (*SI != *SuccChain.begin()) {
-      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Mid chain!\n");
+    if (Succ != *SuccChain.begin()) {
+      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
       continue;
     }
 
-    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
     BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
 
+    // If we outline optional branches, look whether Succ is unavoidable, i.e.
+    // dominates all terminators of the MachineFunction. If it does, other
+    // successors must be optional. Don't do this for cold branches.
+    if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
+        UnavoidableBlocks.count(Succ) > 0) {
+      auto HasShortOptionalBranch = [&]() {
+        for (MachineBasicBlock *Pred : Succ->predecessors()) {
+          // Check whether there is an unplaced optional branch.
+          if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+              BlockToChain[Pred] == &Chain)
+            continue;
+          // Check whether the optional branch has exactly one BB.
+          if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+            continue;
+          // Check whether the optional branch is small.
+          if (Pred->size() < OutlineOptionalThreshold)
+            return true;
+        }
+        return false;
+      };
+      if (!HasShortOptionalBranch())
+        return Succ;
+    }
+
     // Only consider successors which are either "hot", or wouldn't violate
     // any CFG constraints.
     if (SuccChain.LoopPredecessors != 0) {
       if (SuccProb < HotProb) {
-        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
                      << " (prob) (CFG conflict)\n");
         continue;
       }
 
-      // Make sure that a hot successor doesn't have a globally more important
-      // predecessor.
-      BlockFrequency CandidateEdgeFreq
-        = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+      // Make sure that a hot successor doesn't have a globally more
+      // important predecessor.
+      BlockFrequency CandidateEdgeFreq =
+          MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
       bool BadCFGConflict = false;
-      for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
-                                            PE = (*SI)->pred_end();
-           PI != PE; ++PI) {
-        if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
-            BlockToChain[*PI] == &Chain)
+      for (MachineBasicBlock *Pred : Succ->predecessors()) {
+        if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+            BlockToChain[Pred] == &Chain)
           continue;
-        BlockFrequency PredEdgeFreq
-          = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+        BlockFrequency PredEdgeFreq =
+            MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
         if (PredEdgeFreq >= CandidateEdgeFreq) {
           BadCFGConflict = true;
           break;
         }
       }
       if (BadCFGConflict) {
-        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
                      << " (prob) (non-cold CFG conflict)\n");
         continue;
       }
     }
 
-    DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+    DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
                  << " (prob)"
                  << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
                  << "\n");
     if (BestSucc && BestWeight >= SuccWeight)
       continue;
-    BestSucc = *SI;
+    BestSucc = Succ;
     BestWeight = SuccWeight;
   }
   return BestSucc;
@@ -430,29 +467,26 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
   // some code complexity) into the loop below.
   WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
                                 [&](MachineBasicBlock *BB) {
-                   return BlockToChain.lookup(BB) == &Chain;
-                 }),
+                                  return BlockToChain.lookup(BB) == &Chain;
+                                }),
                  WorkList.end());
 
   MachineBasicBlock *BestBlock = nullptr;
   BlockFrequency BestFreq;
-  for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
-                                                      WBE = WorkList.end();
-       WBI != WBE; ++WBI) {
-    BlockChain &SuccChain = *BlockToChain[*WBI];
+  for (MachineBasicBlock *MBB : WorkList) {
+    BlockChain &SuccChain = *BlockToChain[MBB];
     if (&SuccChain == &Chain) {
-      DEBUG(dbgs() << "    " << getBlockName(*WBI)
-                   << " -> Already merged!\n");
+      DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> Already merged!\n");
       continue;
     }
     assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
 
-    BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
-    DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> ";
-                 MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+    BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
+    DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> ";
+          MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
     if (BestBlock && BestFreq >= CandidateFreq)
       continue;
-    BestBlock = *WBI;
+    BestBlock = MBB;
     BestFreq = CandidateFreq;
   }
   return BestBlock;
@@ -485,8 +519,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
 }
 
 void MachineBlockPlacement::buildChain(
-    MachineBasicBlock *BB,
-    BlockChain &Chain,
+    MachineBasicBlock *BB, BlockChain &Chain,
     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
     const BlockFilterSet *BlockFilter) {
   assert(BB);
@@ -513,8 +546,8 @@ void MachineBlockPlacement::buildChain(
       BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
 
     if (!BestSucc) {
-      BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
-                                       BlockFilter);
+      BestSucc =
+          getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
       if (!BestSucc)
         break;
 
@@ -527,8 +560,8 @@ void MachineBlockPlacement::buildChain(
     // Zero out LoopPredecessors for the successor we're about to merge in case
     // we selected a successor that didn't fit naturally into the CFG.
     SuccChain.LoopPredecessors = 0;
-    DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
-                 << " to " << getBlockNum(BestSucc) << "\n");
+    DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
+                 << getBlockNum(BestSucc) << "\n");
     markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
     Chain.merge(BestSucc, &SuccChain);
     BB = *std::prev(Chain.end());
@@ -558,20 +591,17 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
   if (!LoopBlockSet.count(*HeaderChain.begin()))
     return L.getHeader();
 
-  DEBUG(dbgs() << "Finding best loop top for: "
-               << getBlockName(L.getHeader()) << "\n");
+  DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
+               << "\n");
 
   BlockFrequency BestPredFreq;
   MachineBasicBlock *BestPred = nullptr;
-  for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
-                                        PE = L.getHeader()->pred_end();
-       PI != PE; ++PI) {
-    MachineBasicBlock *Pred = *PI;
+  for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
     if (!LoopBlockSet.count(Pred))
       continue;
     DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", "
                  << Pred->succ_size() << " successors, ";
-                 MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+          MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
     if (Pred->succ_size() > 1)
       continue;
 
@@ -598,15 +628,13 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
   return BestPred;
 }
 
-
 /// \brief Find the best loop exiting block for layout.
 ///
 /// This routine implements the logic to analyze the loop looking for the best
 /// block to layout at the top of the loop. Typically this is done to maximize
 /// fallthrough opportunities.
 MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
-                                        MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
                                         const BlockFilterSet &LoopBlockSet) {
   // We don't want to layout the loop linearly in all cases. If the loop header
   // is just a normal basic block in the loop, we want to look for what block
@@ -628,15 +656,13 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
   // blocks where rotating to exit with that block will reach an outer loop.
   SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
 
-  DEBUG(dbgs() << "Finding best loop exit for: "
-               << getBlockName(L.getHeader()) << "\n");
-  for (MachineLoop::block_iterator I = L.block_begin(),
-                                   E = L.block_end();
-       I != E; ++I) {
-    BlockChain &Chain = *BlockToChain[*I];
+  DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
+               << "\n");
+  for (MachineBasicBlock *MBB : L.getBlocks()) {
+    BlockChain &Chain = *BlockToChain[MBB];
     // Ensure that this block is at the end of a chain; otherwise it could be
-    // mid-way through an inner loop or a successor of an analyzable branch.
-    if (*I != *std::prev(Chain.end()))
+    // mid-way through an inner loop or a successor of an unanalyzable branch.
+    if (MBB != *std::prev(Chain.end()))
       continue;
 
     // Now walk the successors. We need to establish whether this has a viable
@@ -650,59 +676,56 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
     // the MBPI analysis, we use the internal weights and manually compute the
     // probabilities to avoid quadratic behavior.
     uint32_t WeightScale = 0;
-    uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
-    for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
-                                          SE = (*I)->succ_end();
-         SI != SE; ++SI) {
-      if ((*SI)->isLandingPad())
+    uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
+    for (MachineBasicBlock *Succ : MBB->successors()) {
+      if (Succ->isLandingPad())
         continue;
-      if (*SI == *I)
+      if (Succ == MBB)
         continue;
-      BlockChain &SuccChain = *BlockToChain[*SI];
+      BlockChain &SuccChain = *BlockToChain[Succ];
       // Don't split chains, either this chain or the successor's chain.
       if (&Chain == &SuccChain) {
-        DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
-                     << getBlockName(*SI) << " (chain conflict)\n");
+        DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
+                     << getBlockName(Succ) << " (chain conflict)\n");
         continue;
       }
 
-      uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
-      if (LoopBlockSet.count(*SI)) {
-        DEBUG(dbgs() << "    looping: " << getBlockName(*I) << " -> "
-                     << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+      uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
+      if (LoopBlockSet.count(Succ)) {
+        DEBUG(dbgs() << "    looping: " << getBlockName(MBB) << " -> "
+                     << getBlockName(Succ) << " (" << SuccWeight << ")\n");
         HasLoopingSucc = true;
         continue;
       }
 
       unsigned SuccLoopDepth = 0;
-      if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
+      if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
         SuccLoopDepth = ExitLoop->getLoopDepth();
         if (ExitLoop->contains(&L))
-          BlocksExitingToOuterLoop.insert(*I);
+          BlocksExitingToOuterLoop.insert(MBB);
       }
 
       BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
-      BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
-      DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
-                   << getBlockName(*SI) << " [L:" << SuccLoopDepth
-                   << "] (";
-                   MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+      BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
+      DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
+                   << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
+            MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
       // Note that we bias this toward an existing layout successor to retain
       // incoming order in the absence of better information. The exit must have
       // a frequency higher than the current exit before we consider breaking
       // the layout.
       BranchProbability Bias(100 - ExitBlockBias, 100);
-      if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
+      if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
           ExitEdgeFreq > BestExitEdgeFreq ||
-          ((*I)->isLayoutSuccessor(*SI) &&
+          (MBB->isLayoutSuccessor(Succ) &&
            !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
         BestExitEdgeFreq = ExitEdgeFreq;
-        ExitingBB = *I;
+        ExitingBB = MBB;
       }
     }
 
-    // Restore the old exiting state, no viable looping successor was found.
     if (!HasLoopingSucc) {
+      // Restore the old exiting state, no viable looping successor was found.
       ExitingBB = OldExitingBB;
       BestExitEdgeFreq = OldBestExitEdgeFreq;
       continue;
@@ -738,12 +761,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
 
   MachineBasicBlock *Top = *LoopChain.begin();
   bool ViableTopFallthrough = false;
-  for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
-                                        PE = Top->pred_end();
-       PI != PE; ++PI) {
-    BlockChain *PredChain = BlockToChain[*PI];
-    if (!LoopBlockSet.count(*PI) &&
-        (!PredChain || *PI == *std::prev(PredChain->end()))) {
+  for (MachineBasicBlock *Pred : Top->predecessors()) {
+    BlockChain *PredChain = BlockToChain[Pred];
+    if (!LoopBlockSet.count(Pred) &&
+        (!PredChain || Pred == *std::prev(PredChain->end()))) {
       ViableTopFallthrough = true;
       break;
     }
@@ -754,18 +775,16 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
   // introduce an unnecessary branch.
   if (ViableTopFallthrough) {
     MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
-    for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
-                                          SE = Bottom->succ_end();
-         SI != SE; ++SI) {
-      BlockChain *SuccChain = BlockToChain[*SI];
-      if (!LoopBlockSet.count(*SI) &&
-          (!SuccChain || *SI == *SuccChain->begin()))
+    for (MachineBasicBlock *Succ : Bottom->successors()) {
+      BlockChain *SuccChain = BlockToChain[Succ];
+      if (!LoopBlockSet.count(Succ) &&
+          (!SuccChain || Succ == *SuccChain->begin()))
         return;
     }
   }
 
-  BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
-                                          ExitingBB);
+  BlockChain::iterator ExitIt =
+      std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
   if (ExitIt == LoopChain.end())
     return;
 
@@ -782,8 +801,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
                                             MachineLoop &L) {
   // First recurse through any nested loops, building chains for those inner
   // loops.
-  for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
-    buildLoopChains(F, **LI);
+  for (MachineLoop *InnerLoop : L)
+    buildLoopChains(F, *InnerLoop);
 
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
   BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
@@ -809,21 +828,16 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
   assert(LoopChain.LoopPredecessors == 0);
   UpdatedPreds.insert(&LoopChain);
-  for (MachineLoop::block_iterator BI = L.block_begin(),
-                                   BE = L.block_end();
-       BI != BE; ++BI) {
-    BlockChain &Chain = *BlockToChain[*BI];
+  for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+    BlockChain &Chain = *BlockToChain[LoopBB];
     if (!UpdatedPreds.insert(&Chain).second)
       continue;
 
     assert(Chain.LoopPredecessors == 0);
-    for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
-         BCI != BCE; ++BCI) {
-      assert(BlockToChain[*BCI] == &Chain);
-      for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
-                                            PE = (*BCI)->pred_end();
-           PI != PE; ++PI) {
-        if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+    for (MachineBasicBlock *ChainBB : Chain) {
+      assert(BlockToChain[ChainBB] == &Chain);
+      for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+        if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
           continue;
         ++Chain.LoopPredecessors;
       }
@@ -845,29 +859,26 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
              << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
              << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
     }
-    for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
-         BCI != BCE; ++BCI) {
-      dbgs() << "          ... " << getBlockName(*BCI) << "\n";
-      if (!LoopBlockSet.erase(*BCI)) {
+    for (MachineBasicBlock *ChainBB : LoopChain) {
+      dbgs() << "          ... " << getBlockName(ChainBB) << "\n";
+      if (!LoopBlockSet.erase(ChainBB)) {
         // We don't mark the loop as bad here because there are real situations
         // where this can occur. For example, with an unanalyzable fallthrough
         // from a loop block to a non-loop block or vice versa.
         dbgs() << "Loop chain contains a block not contained by the loop!\n"
                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
-               << "  Bad block:    " << getBlockName(*BCI) << "\n";
+               << "  Bad block:    " << getBlockName(ChainBB) << "\n";
       }
     }
 
     if (!LoopBlockSet.empty()) {
       BadLoop = true;
-      for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
-                                    LBE = LoopBlockSet.end();
-           LBI != LBE; ++LBI)
+      for (MachineBasicBlock *LoopBB : LoopBlockSet)
         dbgs() << "Loop contains blocks never placed into a chain!\n"
                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
-               << "  Bad block:    " << getBlockName(*LBI) << "\n";
+               << "  Bad block:    " << getBlockName(LoopBB) << "\n";
     }
     assert(!BadLoop && "Detected problems with the placement of this loop.");
   });
@@ -879,8 +890,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
   for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
     MachineBasicBlock *BB = FI;
-    BlockChain *Chain
-      = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+    BlockChain *Chain =
+        new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
     // Also, merge any blocks which we cannot reason about and must preserve
     // the exact fallthrough behavior for.
     for (;;) {
@@ -903,28 +914,44 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     }
   }
 
+  if (OutlineOptionalBranches) {
+    // Find the nearest common dominator of all of F's terminators.
+    MachineBasicBlock *Terminator = nullptr;
+    for (MachineBasicBlock &MBB : F) {
+      if (MBB.succ_size() == 0) {
+        if (Terminator == nullptr)
+          Terminator = &MBB;
+        else
+          Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+      }
+    }
+
+    // MBBs dominating this common dominator are unavoidable.
+    UnavoidableBlocks.clear();
+    for (MachineBasicBlock &MBB : F) {
+      if (MDT->dominates(&MBB, Terminator)) {
+        UnavoidableBlocks.insert(&MBB);
+      }
+    }
+  }
+
   // Build any loop-based chains.
-  for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
-       ++LI)
-    buildLoopChains(F, **LI);
+  for (MachineLoop *L : *MLI)
+    buildLoopChains(F, *L);
 
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
 
   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
-  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-    MachineBasicBlock *BB = &*FI;
-    BlockChain &Chain = *BlockToChain[BB];
+  for (MachineBasicBlock &MBB : F) {
+    BlockChain &Chain = *BlockToChain[&MBB];
     if (!UpdatedPreds.insert(&Chain).second)
       continue;
 
     assert(Chain.LoopPredecessors == 0);
-    for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
-         BCI != BCE; ++BCI) {
-      assert(BlockToChain[*BCI] == &Chain);
-      for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
-                                            PE = (*BCI)->pred_end();
-           PI != PE; ++PI) {
-        if (BlockToChain[*PI] == &Chain)
+    for (MachineBasicBlock *ChainBB : Chain) {
+      assert(BlockToChain[ChainBB] == &Chain);
+      for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+        if (BlockToChain[Pred] == &Chain)
           continue;
         ++Chain.LoopPredecessors;
       }
@@ -944,46 +971,40 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     // Crash at the end so we get all of the debugging output first.
     bool BadFunc = false;
     FunctionBlockSetType FunctionBlockSet;
-    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-      FunctionBlockSet.insert(FI);
+    for (MachineBasicBlock &MBB : F)
+      FunctionBlockSet.insert(&MBB);
 
-    for (BlockChain::iterator BCI = FunctionChain.begin(),
-                              BCE = FunctionChain.end();
-         BCI != BCE; ++BCI)
-      if (!FunctionBlockSet.erase(*BCI)) {
+    for (MachineBasicBlock *ChainBB : FunctionChain)
+      if (!FunctionBlockSet.erase(ChainBB)) {
         BadFunc = true;
         dbgs() << "Function chain contains a block not in the function!\n"
-               << "  Bad block:    " << getBlockName(*BCI) << "\n";
+               << "  Bad block:    " << getBlockName(ChainBB) << "\n";
       }
 
     if (!FunctionBlockSet.empty()) {
       BadFunc = true;
-      for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
-                                          FBE = FunctionBlockSet.end();
-           FBI != FBE; ++FBI)
+      for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
         dbgs() << "Function contains blocks never placed into a chain!\n"
-               << "  Bad block:    " << getBlockName(*FBI) << "\n";
+               << "  Bad block:    " << getBlockName(RemainingBB) << "\n";
     }
     assert(!BadFunc && "Detected problems with the block placement.");
   });
 
   // Splice the blocks into place.
   MachineFunction::iterator InsertPos = F.begin();
-  for (BlockChain::iterator BI = FunctionChain.begin(),
-                            BE = FunctionChain.end();
-       BI != BE; ++BI) {
-    DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
-                                                  : "          ... ")
-          << getBlockName(*BI) << "\n");
-    if (InsertPos != MachineFunction::iterator(*BI))
-      F.splice(InsertPos, *BI);
+  for (MachineBasicBlock *ChainBB : FunctionChain) {
+    DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
+                                                       : "          ... ")
+                 << getBlockName(ChainBB) << "\n");
+    if (InsertPos != MachineFunction::iterator(ChainBB))
+      F.splice(InsertPos, ChainBB);
     else
       ++InsertPos;
 
     // Update the terminator of the previous block.
-    if (BI == FunctionChain.begin())
+    if (ChainBB == *FunctionChain.begin())
       continue;
-    MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI));
+    MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
 
     // FIXME: It would be awesome of updateTerminator would just return rather
     // than assert when the branch cannot be analyzed in order to remove this
@@ -993,16 +1014,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
       // The "PrevBB" is not yet updated to reflect current code layout, so,
       //   o. it may fall-through to a block without explict "goto" instruction
-      //      before layout, and no longer fall-through it after layout; or 
+      //      before layout, and no longer fall-through it after layout; or
       //   o. just opposite.
-      // 
+      //
       // AnalyzeBranch() may return erroneous value for FBB when these two
       // situations take place. For the first scenario FBB is mistakenly set
       // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
       // is mistakenly pointing to "*BI".
       //
       bool needUpdateBr = true;
-      if (!Cond.empty() && (!FBB || FBB == *BI)) {
+      if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
         PrevBB->updateTerminator();
         needUpdateBr = false;
         Cond.clear();
@@ -1022,7 +1043,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
                      << getBlockName(PrevBB) << "\n");
         DEBUG(dbgs() << "    Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
                      << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
-        DebugLoc dl;  // FIXME: this is nowhere
+        DebugLoc dl; // FIXME: this is nowhere
         TII->RemoveBranch(*PrevBB);
         TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
         needUpdateBr = true;
@@ -1043,33 +1064,33 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
-  if (F.getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+  if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
     return;
   if (FunctionChain.begin() == FunctionChain.end())
-    return;  // Empty chain.
+    return; // Empty chain.
 
   const BranchProbability ColdProb(1, 5); // 20%
   BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
   BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
-  for (BlockChain::iterator BI = std::next(FunctionChain.begin()),
-                            BE = FunctionChain.end();
-       BI != BE; ++BI) {
+  for (MachineBasicBlock *ChainBB : FunctionChain) {
+    if (ChainBB == *FunctionChain.begin())
+      continue;
+
     // Don't align non-looping basic blocks. These are unlikely to execute
     // enough times to matter in practice. Note that we'll still handle
     // unnatural CFGs inside of a natural outer loop (the common case) and
     // rotated loops.
-    MachineLoop *L = MLI->getLoopFor(*BI);
+    MachineLoop *L = MLI->getLoopFor(ChainBB);
     if (!L)
       continue;
 
     unsigned Align = TLI->getPrefLoopAlignment(L);
     if (!Align)
-      continue;  // Don't care about loop alignment.
+      continue; // Don't care about loop alignment.
 
     // If the block is cold relative to the function entry don't waste space
     // aligning it.
-    BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+    BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
     if (Freq < WeightedEntryFreq)
       continue;
 
@@ -1082,12 +1103,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
 
     // Check for the existence of a non-layout predecessor which would benefit
     // from aligning this block.
-    MachineBasicBlock *LayoutPred = *std::prev(BI);
+    MachineBasicBlock *LayoutPred =
+        &*std::prev(MachineFunction::iterator(ChainBB));
 
     // Force alignment if all the predecessors are jumps. We already checked
     // that the block isn't cold above.
-    if (!LayoutPred->isSuccessor(*BI)) {
-      (*BI)->setAlignment(Align);
+    if (!LayoutPred->isSuccessor(ChainBB)) {
+      ChainBB->setAlignment(Align);
       continue;
     }
 
@@ -1095,10 +1117,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     // cold relative to the block. When this is true, other predecessors make up
     // all of the hot entries into the block and thus alignment is likely to be
     // important.
-    BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+    BranchProbability LayoutProb =
+        MBPI->getEdgeProbability(LayoutPred, ChainBB);
     BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
     if (LayoutEdgeFreq <= (Freq * ColdProb))
-      (*BI)->setAlignment(Align);
+      ChainBB->setAlignment(Align);
   }
 }
 
@@ -1115,6 +1138,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   MLI = &getAnalysis<MachineLoopInfo>();
   TII = F.getSubtarget().getInstrInfo();
   TLI = F.getSubtarget().getTargetLowering();
+  MDT = &getAnalysis<MachineDominatorTree>();
   assert(BlockToChain.empty());
 
   buildCFGChains(F);
@@ -1124,9 +1148,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
 
   if (AlignAllBlock)
     // Align all of the blocks in the function to a specific alignment.
-    for (MachineFunction::iterator FI = F.begin(), FE = F.end();
-         FI != FE; ++FI)
-      FI->setAlignment(AlignAllBlock);
+    for (MachineBasicBlock &MBB : F)
+      MBB.setAlignment(AlignAllBlock);
 
   // We always return true as we have no way to track whether the final order
   // differs from the original order.
@@ -1181,20 +1204,19 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
 
-  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
-    Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
-                                                  : NumUncondBranches;
-    Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
-                                                      : UncondBranchTakenFreq;
-    for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
-                                          SE = I->succ_end();
-         SI != SE; ++SI) {
+  for (MachineBasicBlock &MBB : F) {
+    BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+    Statistic &NumBranches =
+        (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
+    Statistic &BranchTakenFreq =
+        (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
+    for (MachineBasicBlock *Succ : MBB.successors()) {
       // Skip if this successor is a fallthrough.
-      if (I->isLayoutSuccessor(*SI))
+      if (MBB.isLayoutSuccessor(Succ))
         continue;
 
-      BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+      BlockFrequency EdgeFreq =
+          BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
       ++NumBranches;
       BranchTakenFreq += EdgeFreq.getFrequency();
     }
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 2960408..87aaaa0 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
@@ -47,7 +48,7 @@ namespace {
     MachineRegisterInfo *MRI;
   public:
     static char ID; // Pass identification
-    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) {
       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
     }
 
@@ -68,7 +69,7 @@ namespace {
     }
 
   private:
-    const unsigned LookAheadLimit;
+    unsigned LookAheadLimit;
     typedef RecyclingAllocator<BumpPtrAllocator,
         ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
     typedef ScopedHashTable<MachineInstr*, unsigned,
@@ -580,8 +581,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     // Actually perform the elimination.
     if (DoCSE) {
       for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
-        MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
-        MRI->clearKillFlags(CSEPairs[i].second);
+        unsigned OldReg = CSEPairs[i].first;
+        unsigned NewReg = CSEPairs[i].second;
+        // OldReg may have been unused but is used now, clear the Dead flag
+        MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
+        assert(Def != nullptr && "CSEd register has no unique definition?");
+        Def->clearRegisterDeads(NewReg);
+        // Replace with NewReg and clear kill flags which may be wrong now.
+        MRI->replaceRegWith(OldReg, NewReg);
+        MRI->clearKillFlags(NewReg);
       }
 
       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
@@ -708,5 +716,6 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
+  LookAheadLimit = TII->getMachineCSELookAheadLimit();
   return PerformCSE(DT->getRootNode());
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 2931258..a4bc77e 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -45,7 +45,7 @@ class MachineCombiner : public MachineFunctionPass {
 
   TargetSchedModel TSchedModel;
 
-  /// OptSize - True if optimizing for code size.
+  /// True if optimizing for code size.
   bool OptSize;
 
 public:
@@ -109,7 +109,7 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
   return DefInstr;
 }
 
-/// getDepth - Computes depth of instructions in vector \InsInstr.
+/// Computes depth of instructions in vector \InsInstr.
 ///
 /// \param InsInstrs is a vector of machine instructions
 /// \param InstrIdxForVirtReg is a dense map of virtual register to index
@@ -125,14 +125,13 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
   SmallVector<unsigned, 16> InstrDepth;
   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
 
-  // Foreach instruction in in the new sequence compute the depth based on the
+  // For each instruction in the new sequence compute the depth based on the
   // operands. Use the trace information when possible. For new operands which
   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
   for (auto *InstrPtr : InsInstrs) { // for each Use
     unsigned IDepth = 0;
     DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
-    for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = InstrPtr->getOperand(i);
+    for (const MachineOperand &MO : InstrPtr->operands()) {
       // Check for virtual register operand.
       if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
         continue;
@@ -169,8 +168,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
   return InstrDepth[NewRootIdx];
 }
 
-/// getLatency - Computes instruction latency as max of latency of defined
-/// operands
+/// Computes instruction latency as max of latency of defined operands.
 ///
 /// \param Root is a machine instruction that could be replaced by NewRoot.
 /// It is used to compute a more accurate latency information for NewRoot in
@@ -187,8 +185,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
   // Check each definition in NewRoot and compute the latency
   unsigned NewRootLatency = 0;
 
-  for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = NewRoot->getOperand(i);
+  for (const MachineOperand &MO : NewRoot->operands()) {
     // Check for virtual register operand.
     if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
       continue;
@@ -211,12 +208,12 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
   return NewRootLatency;
 }
 
-/// preservesCriticalPathlen - True when the new instruction sequence does not
+/// True when the new instruction sequence does not
 /// lengthen the critical path. The DAGCombine code sequence ends in MI
 /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
 /// necessary condition for the new sequence to replace the old sequence is that
-/// is cannot lengthen the critical path. This is decided by the formula
-/// (NewRootDepth + NewRootLatency) <=  (RootDepth + RootLatency + RootSlack)).
+/// it cannot lengthen the critical path. This is decided by the formula
+/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
 /// The slack is the number of cycles Root can be delayed before the critical
 /// patch becomes longer.
 bool MachineCombiner::preservesCriticalPathLen(
@@ -264,8 +261,7 @@ void MachineCombiner::instr2instrSC(
     InstrsSC.push_back(SC);
   }
 }
-/// preservesResourceLen - True when the new instructions do not increase
-/// resource length
+/// True when the new instructions do not increase resource length
 bool MachineCombiner::preservesResourceLen(
     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -300,7 +296,7 @@ bool MachineCombiner::preservesResourceLen(
 }
 
 /// \returns true when new instruction sequence should be generated
-/// independent if it lenghtens critical path or not
+/// independent if it lengthens critical path or not
 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
   if (OptSize && (NewSize < OldSize))
     return true;
@@ -309,7 +305,7 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
   return false;
 }
 
-/// combineInstructions - substitute a slow code sequence with a faster one by
+/// Substitute a slow code sequence with a faster one by
 /// evaluating instruction combining pattern.
 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
 /// combining based on machine trace metrics. Only combine a sequence of
@@ -370,7 +366,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
           continue;
         // Substitute when we optimize for codesize and the new sequence has
         // fewer instructions OR
-        // the new sequence neither lenghten the critical path nor increases
+        // the new sequence neither lengthens the critical path nor increases
         // resource pressure.
         if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
             (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
@@ -406,8 +402,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
 }
 
 bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
-  const TargetSubtargetInfo &STI =
-      MF.getTarget().getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
   TII = STI.getInstrInfo();
   TRI = STI.getRegisterInfo();
   SchedModel = STI.getSchedModel();
@@ -416,8 +411,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = 0;
 
-  OptSize = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
 
   DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
   if (!TII->useMachineCombiner()) {
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 9611122..43c80b7 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -252,7 +252,11 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
         report_fatal_error("MachineCopyPropagation should be run after"
                            " register allocation!");
 
-      if (MO.isDef()) {
+      // Treat undef use like defs.
+      // The backends are allowed to do whatever they want with undef value
+      // and we cannot be sure this register will not be rewritten to break
+      // some false dependencies for the hardware for instance.
+      if (MO.isDef() || MO.isUndef()) {
         Defs.push_back(Reg);
         continue;
       }
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
index df60cf3..467a2e4 100644
--- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallBitVector.h"
 
 using namespace llvm;
 
@@ -59,3 +60,68 @@ void MachineDominatorTree::releaseMemory() {
 void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
   DT->print(OS);
 }
+
+void MachineDominatorTree::applySplitCriticalEdges() const {
+  // Bail out early if there is nothing to do.
+  if (CriticalEdgesToSplit.empty())
+    return;
+
+  // For each element in CriticalEdgesToSplit, remember whether or not element
+  // is the new immediate domminator of its successor. The mapping is done by
+  // index, i.e., the information for the ith element of CriticalEdgesToSplit is
+  // the ith element of IsNewIDom.
+  SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true);
+  size_t Idx = 0;
+
+  // Collect all the dominance properties info, before invalidating
+  // the underlying DT.
+  for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+    // Update dominator information.
+    MachineBasicBlock *Succ = Edge.ToBB;
+    MachineDomTreeNode *SuccDTNode = DT->getNode(Succ);
+
+    for (MachineBasicBlock *PredBB : Succ->predecessors()) {
+      if (PredBB == Edge.NewBB)
+        continue;
+      // If we are in this situation:
+      // FromBB1        FromBB2
+      //    +              +
+      //   + +            + +
+      //  +   +          +   +
+      // ...  Split1  Split2 ...
+      //           +   +
+      //            + +
+      //             +
+      //            Succ
+      // Instead of checking the domiance property with Split2, we check it with
+      // FromBB2 since Split2 is still unknown of the underlying DT structure.
+      if (NewBBs.count(PredBB)) {
+        assert(PredBB->pred_size() == 1 && "A basic block resulting from a "
+                                           "critical edge split has more "
+                                           "than one predecessor!");
+        PredBB = *PredBB->pred_begin();
+      }
+      if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) {
+        IsNewIDom[Idx] = false;
+        break;
+      }
+    }
+    ++Idx;
+  }
+
+  // Now, update DT with the collected dominance properties info.
+  Idx = 0;
+  for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+    // We know FromBB dominates NewBB.
+    MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB);
+
+    // If all the other predecessors of "Succ" are dominated by "Succ" itself
+    // then the new block is the new immediate dominator of "Succ". Otherwise,
+    // the new block doesn't dominate anything.
+    if (IsNewIDom[Idx])
+      DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode);
+    ++Idx;
+  }
+  NewBBs.clear();
+  CriticalEdgesToSplit.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index 6b4cba6..8ec63f8 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -54,7 +54,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
 
 MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                                  unsigned FunctionNum, MachineModuleInfo &mmi)
-    : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()),
+    : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
       MMI(mmi) {
   if (STI->getRegisterInfo())
     RegInfo = new (Allocator) MachineRegisterInfo(this);
@@ -67,17 +67,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                        STI->getFrameLowering()->isStackRealignable(),
                        !F->hasFnAttribute("no-realign-stack"));
 
-  if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::StackAlignment))
-    FrameInfo->ensureMaxAlignment(Fn->getAttributes().
-                                getStackAlignment(AttributeSet::FunctionIndex));
+  if (Fn->hasFnAttribute(Attribute::StackAlignment))
+    FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
 
   ConstantPool = new (Allocator) MachineConstantPool(TM);
   Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
 
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
-  if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                        Attribute::OptimizeForSize))
+  if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
     Alignment = std::max(Alignment,
                          STI->getTargetLowering()->getPrefFunctionAlignment());
 
@@ -383,7 +380,7 @@ namespace llvm {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getName().str() + "' function";
+      return ("CFG for '" + F->getName() + "' function").str();
     }
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -462,7 +459,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
 /// normal 'L' label is returned.
 MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
                                         bool isLinkerPrivate) const {
-  const DataLayout *DL = getSubtarget().getDataLayout();
+  const DataLayout *DL = getTarget().getDataLayout();
   assert(JumpTableInfo && "No jump tables");
   assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
 
@@ -471,14 +468,14 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   SmallString<60> Name;
   raw_svector_ostream(Name)
     << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.getOrCreateSymbol(Name);
 }
 
 /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
 /// base.
 MCSymbol *MachineFunction::getPICBaseSymbol() const {
-  const DataLayout *DL = getSubtarget().getDataLayout();
-  return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
+  const DataLayout *DL = getTarget().getDataLayout();
+  return Ctx.getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
                                Twine(getFunctionNumber())+"$pb");
 }
 
@@ -587,21 +584,12 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
   return -++NumFixedObjects;
 }
 
-int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) {
-  // Force the use of a frame pointer. The intention is that this intrinsic be
-  // used in conjunction with unwind mechanisms that leak the frame pointer.
-  setFrameAddressIsTaken(true);
-  Size = RoundUpToAlignment(Size, StackAlignment);
-  return CreateStackObject(Size, StackAlignment, false);
-}
-
 BitVector
 MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   assert(MBB && "MBB must be valid");
   const MachineFunction *MF = MBB->getParent();
   assert(MF && "MBB must be part of a MachineFunction");
-  const TargetMachine &TM = MF->getTarget();
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   BitVector BV(TRI->getNumRegs());
 
   // Before CSI is calculated, no registers are considered pristine. They can be
@@ -612,8 +600,8 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
     BV.set(*CSR);
 
-  // The entry MBB always has all CSRs pristine.
-  if (MBB == &MF->front())
+  // Each MBB before the save point has all CSRs pristine.
+  if (isBeforeSavePoint(*MF, *MBB))
     return BV;
 
   // On other MBBs the saved CSRs are not pristine.
@@ -625,6 +613,40 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   return BV;
 }
 
+// Note: We could use some sort of caching mecanism, but we lack the ability
+// to know when the cache is invalid, i.e., the CFG changed.
+// Assuming we have that, we can simply compute all the set of MBBs
+// that are before the save point.
+bool MachineFrameInfo::isBeforeSavePoint(const MachineFunction &MF,
+                                         const MachineBasicBlock &MBB) const {
+  // Early exit if shrink-wrapping did not kick.
+  if (!Save)
+    return &MBB == &MF.front();
+
+  // Starting from MBB, check if there is a path leading to Save that do
+  // not cross Restore.
+  SmallPtrSet<const MachineBasicBlock *, 8> Visited;
+  SmallVector<const MachineBasicBlock *, 8> WorkList;
+  WorkList.push_back(&MBB);
+  Visited.insert(&MBB);
+  do {
+    const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+    // By construction, the region that is after the save point is
+    // dominated by the Save and post-dominated by the Restore.
+    // If we do not reach Restore and still reach Save, this
+    // means MBB is before Save.
+    if (CurBB == Save)
+      return true;
+    if (CurBB == Restore)
+      continue;
+    // Enqueue all the successors not already visited.
+    for (MachineBasicBlock *SuccBB : CurBB->successors())
+      if (Visited.insert(SuccBB).second)
+        WorkList.push_back(SuccBB);
+  } while (!WorkList.empty());
+  return false;
+}
+
 unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -821,7 +843,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
 void MachineConstantPoolValue::anchor() { }
 
 const DataLayout *MachineConstantPool::getDataLayout() const {
-  return TM.getSubtargetImpl()->getDataLayout();
+  return TM.getDataLayout();
 }
 
 Type *MachineConstantPoolEntry::getType() const {
@@ -843,13 +865,13 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
   switch (getRelocationInfo()) {
   default:
     llvm_unreachable("Unknown section kind");
-  case 2:
+  case Constant::GlobalRelocations:
     Kind = SectionKind::getReadOnlyWithRel();
     break;
-  case 1:
+  case Constant::LocalRelocation:
     Kind = SectionKind::getReadOnlyWithRelLocal();
     break;
-  case 0:
+  case Constant::NoRelocation:
     switch (DL->getTypeAllocSize(getType())) {
     case 4:
       Kind = SectionKind::getMergeableConst4();
@@ -861,7 +883,7 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
       Kind = SectionKind::getMergeableConst16();
       break;
     default:
-      Kind = SectionKind::getMergeableConst();
+      Kind = SectionKind::getReadOnly();
       break;
     }
   }
@@ -907,16 +929,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   // DataLayout.
   if (isa<PointerType>(A->getType()))
     A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
-                                 const_cast<Constant*>(A), TD);
+                                 const_cast<Constant *>(A), *TD);
   else if (A->getType() != IntTy)
     A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
-                                 const_cast<Constant*>(A), TD);
+                                 const_cast<Constant *>(A), *TD);
   if (isa<PointerType>(B->getType()))
     B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
-                                 const_cast<Constant*>(B), TD);
+                                 const_cast<Constant *>(B), *TD);
   else if (B->getType() != IntTy)
     B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
-                                 const_cast<Constant*>(B), TD);
+                                 const_cast<Constant *>(B), *TD);
 
   return A == B;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 2f076b6..aaf06a7 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Analysis/DominanceFrontier.h"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -54,7 +53,7 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<DominanceFrontier>();
   AU.addPreserved<DominatorTreeWrapperPass>();
   AU.addPreserved<IVUsers>();
-  AU.addPreserved<LoopInfo>();
+  AU.addPreserved<LoopInfoWrapperPass>();
   AU.addPreserved<MemoryDependenceAnalysis>();
   AU.addPreserved<ScalarEvolution>();
   AU.addPreserved<StackProtector>();
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 968ec2c..205032f 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -141,6 +141,28 @@ void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
   Contents.CFP = FPImm;
 }
 
+void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into an external symbol");
+
+  removeRegFromUses();
+
+  OpKind = MO_ExternalSymbol;
+  Contents.OffsetedInfo.Val.SymbolName = SymName;
+  setOffset(0); // Offset is always 0.
+  setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into an MCSymbol");
+
+  removeRegFromUses();
+
+  OpKind = MO_MCSymbol;
+  Contents.Sym = Sym;
+}
+
 /// ChangeToRegister - Replace this operand with a new register operand of
 /// the specified value.  If an operand is known to be an register already,
 /// the setReg method should be used.
@@ -276,17 +298,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
 
 /// print - Print the specified machine operand.
 ///
-void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
-  // If the instruction is embedded into a basic block, we can find the
-  // target info for the instruction.
-  if (!TM)
-    if (const MachineInstr *MI = getParent())
-      if (const MachineBasicBlock *MBB = MI->getParent())
-        if (const MachineFunction *MF = MBB->getParent())
-          TM = &MF->getTarget();
-  const TargetRegisterInfo *TRI =
-      TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
-
+void MachineOperand::print(raw_ostream &OS,
+                           const TargetRegisterInfo *TRI) const {
   switch (getType()) {
   case MachineOperand::MO_Register:
     OS << PrintReg(getReg(), TRI, getSubReg());
@@ -308,8 +321,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
         if (isUndef() && getSubReg())
           OS << ",read-undef";
       } else if (isImplicit()) {
-          OS << "imp-use";
-          NeedComma = true;
+        OS << "imp-use";
+        NeedComma = true;
       }
 
       if (isKill()) {
@@ -595,10 +608,10 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
 /// implicit operands. It reserves space for the number of operands specified by
 /// the MCInstrDesc.
 MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
-                           const DebugLoc dl, bool NoImp)
-  : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0),
-    Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) {
+                           DebugLoc dl, bool NoImp)
+    : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
+      AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
+      debugLoc(std::move(dl)) {
   assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
 
   // Reserve space for the expected number of operands.
@@ -625,8 +638,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
   Operands = MF.allocateOperandArray(CapOperands);
 
   // Copy operands.
-  for (unsigned i = 0; i != MI.getNumOperands(); ++i)
-    addOperand(MF, MI.getOperand(i));
+  for (const MachineOperand &MO : MI.operands())
+    addOperand(MF, MO);
 
   // Copy all the sensible flags.
   setFlags(MI.Flags);
@@ -645,18 +658,18 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
 /// this instruction from their respective use lists.  This requires that the
 /// operands already be on their use lists.
 void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (Operands[i].isReg())
-      MRI.removeRegOperandFromUseList(&Operands[i]);
+  for (MachineOperand &MO : operands())
+    if (MO.isReg())
+      MRI.removeRegOperandFromUseList(&MO);
 }
 
 /// AddRegOperandsToUseLists - Add all of the register operands in
 /// this instruction from their respective use lists.  This requires that the
 /// operands not be on their use lists yet.
 void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (Operands[i].isReg())
-      MRI.addRegOperandToUseList(&Operands[i]);
+  for (MachineOperand &MO : operands())
+    if (MO.isReg())
+      MRI.addRegOperandToUseList(&MO);
 }
 
 void MachineInstr::addOperand(const MachineOperand &Op) {
@@ -674,14 +687,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
   if (MRI)
     return MRI->moveOperands(Dst, Src, NumOps);
 
-  // Here it would be convenient to call memmove, so that isn't allowed because
-  // MachineOperand has a constructor and so isn't a POD type.
-  if (Dst < Src)
-    for (unsigned i = 0; i != NumOps; ++i)
-      new (Dst + i) MachineOperand(Src[i]);
-  else
-    for (unsigned i = NumOps; i ; --i)
-      new (Dst + i - 1) MachineOperand(Src[i - 1]);
+  // MachineOperand is a trivially copyable type so we can just use memmove.
+  std::memmove(Dst, Src, NumOps * sizeof(MachineOperand));
 }
 
 /// addOperand - Add the specified operand to the instruction.  If it is an
@@ -896,8 +903,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
   }
   // If DebugLoc does not match then two dbg.values are not identical.
   if (isDebugValue())
-    if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
-        && getDebugLoc() != Other->getDebugLoc())
+    if (getDebugLoc() && Other->getDebugLoc() &&
+        getDebugLoc() != Other->getDebugLoc())
       return false;
   return true;
 }
@@ -926,8 +933,7 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
   MachineInstr *MI = (MachineInstr *)this;
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     if (!MO.isReg() || !MO.isDef())
       continue;
     unsigned Reg = MO.getReg();
@@ -1330,8 +1336,7 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
 /// clearKillInfo - Clears kill flags on all operands.
 ///
 void MachineInstr::clearKillInfo() {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = getOperand(i);
+  for (MachineOperand &MO : operands()) {
     if (MO.isReg() && MO.isUse())
       MO.setIsKill(false);
   }
@@ -1344,15 +1349,13 @@ void MachineInstr::substituteRegister(unsigned FromReg,
   if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
     if (SubIdx)
       ToReg = RegInfo.getSubReg(ToReg, SubIdx);
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = getOperand(i);
+    for (MachineOperand &MO : operands()) {
       if (!MO.isReg() || MO.getReg() != FromReg)
         continue;
       MO.substPhysReg(ToReg, RegInfo);
     }
   } else {
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = getOperand(i);
+    for (MachineOperand &MO : operands()) {
       if (!MO.isReg() || MO.getReg() != FromReg)
         continue;
       MO.substVirtReg(ToReg, SubIdx, RegInfo);
@@ -1363,9 +1366,7 @@ void MachineInstr::substituteRegister(unsigned FromReg,
 /// isSafeToMove - Return true if it is safe to move this instruction. If
 /// SawStore is set to true, it means that there is a store (or call) between
 /// the instruction's location and its intended destination.
-bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
-                                AliasAnalysis *AA,
-                                bool &SawStore) const {
+bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
   //
   // Treat volatile loads as stores. This is not strictly necessary for
@@ -1495,8 +1496,7 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
 /// allDefsAreDead - Return true if all the defs of this instruction are dead.
 ///
 bool MachineInstr::allDefsAreDead() const {
-  for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
-    const MachineOperand &MO = getOperand(i);
+  for (const MachineOperand &MO : operands()) {
     if (!MO.isReg() || MO.isUse())
       continue;
     if (!MO.isDead())
@@ -1523,23 +1523,19 @@ void MachineInstr::dump() const {
 #endif
 }
 
-static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
-                         raw_ostream &CommentOS) {
-  const LLVMContext &Ctx = MF->getFunction()->getContext();
-  DL.print(Ctx, CommentOS);
-}
-
-void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
-                         bool SkipOpers) const {
-  // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+  // We can be a bit tidier if we know the MachineFunction.
   const MachineFunction *MF = nullptr;
+  const TargetRegisterInfo *TRI = nullptr;
   const MachineRegisterInfo *MRI = nullptr;
+  const TargetInstrInfo *TII = nullptr;
   if (const MachineBasicBlock *MBB = getParent()) {
     MF = MBB->getParent();
-    if (!TM && MF)
-      TM = &MF->getTarget();
-    if (MF)
+    if (MF) {
       MRI = &MF->getRegInfo();
+      TRI = MF->getSubtarget().getRegisterInfo();
+      TII = MF->getSubtarget().getInstrInfo();
+    }
   }
 
   // Save a list of virtual registers.
@@ -1552,7 +1548,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
          !getOperand(StartOp).isImplicit();
        ++StartOp) {
     if (StartOp != 0) OS << ", ";
-    getOperand(StartOp).print(OS, TM);
+    getOperand(StartOp).print(OS, TRI);
     unsigned Reg = getOperand(StartOp).getReg();
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       VirtRegs.push_back(Reg);
@@ -1562,8 +1558,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
     OS << " = ";
 
   // Print the opcode name.
-  if (TM && TM->getSubtargetImpl()->getInstrInfo())
-    OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode());
+  if (TII)
+    OS << TII->getName(getOpcode());
   else
     OS << "UNKNOWN";
 
@@ -1579,7 +1575,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
   if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
     // Print asm string.
     OS << " ";
-    getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+    getOperand(InlineAsm::MIOp_AsmString).print(OS, TRI);
 
     // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1617,9 +1613,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         if (MRI->use_empty(Reg)) {
           bool HasAliasLive = false;
-          for (MCRegAliasIterator AI(
-                   Reg, TM->getSubtargetImpl()->getRegisterInfo(), true);
-               AI.isValid(); ++AI) {
+          for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
             unsigned AliasReg = *AI;
             if (!MRI->use_empty(AliasReg)) {
               HasAliasLive = true;
@@ -1645,17 +1639,13 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
     }
     if (isDebugValue() && MO.isMetadata()) {
       // Pretty print DBG_VALUE instructions.
-      const MDNode *MD = MO.getMetadata();
-      DIDescriptor DI(MD);
-      DIVariable DIV(MD);
-
-      if (DI.isVariable() && !DIV.getName().empty())
-        OS << "!\"" << DIV.getName() << '\"';
+      auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
+      if (DIV && !DIV->getName().empty())
+        OS << "!\"" << DIV->getName() << '\"';
       else
-        MO.print(OS, TM);
-    } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
-      OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName(
-          MO.getImm());
+        MO.print(OS, TRI);
+    } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+      OS << TRI->getSubRegIndexName(MO.getImm());
     } else if (i == AsmDescOp && MO.isImm()) {
       // Pretty print the inline asm operand descriptor.
       OS << '$' << AsmOpCount++;
@@ -1672,11 +1662,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
 
       unsigned RCID = 0;
       if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
-        if (TM) {
-          const TargetRegisterInfo *TRI =
-            TM->getSubtargetImpl()->getRegisterInfo();
-          OS << ':'
-             << TRI->getRegClassName(TRI->getRegClass(RCID));
+        if (TRI) {
+          OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
         } else
           OS << ":RC" << RCID;
       }
@@ -1690,7 +1677,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
       // Compute the index of the next operand descriptor.
       AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
     } else
-      MO.print(OS, TM);
+      MO.print(OS, TRI);
   }
 
   // Briefly indicate whether any call clobbers were omitted.
@@ -1726,7 +1713,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
     if (!HaveSemi) OS << ";"; HaveSemi = true;
     for (unsigned i = 0; i != VirtRegs.size(); ++i) {
       const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
-      OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC)
+      OS << " " << TRI->getRegClassName(RC)
          << ':' << PrintReg(VirtRegs[i]);
       for (unsigned j = i+1; j != VirtRegs.size();) {
         if (MRI->getRegClass(VirtRegs[j]) != RC) {
@@ -1741,24 +1728,24 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
   }
 
   // Print debug location information.
-  if (isDebugValue() && getOperand(e - 1).isMetadata()) {
+  if (isDebugValue() && getOperand(e - 2).isMetadata()) {
     if (!HaveSemi) OS << ";";
-    DIVariable DV(getOperand(e - 1).getMetadata());
-    OS << " line no:" <<  DV.getLineNumber();
-    if (MDNode *InlinedAt = DV.getInlinedAt()) {
-      DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
-      if (!InlinedAtDL.isUnknown() && MF) {
+    auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
+    OS << " line no:" <<  DV->getLine();
+    if (auto *InlinedAt = debugLoc->getInlinedAt()) {
+      DebugLoc InlinedAtDL(InlinedAt);
+      if (InlinedAtDL && MF) {
         OS << " inlined @[ ";
-        printDebugLoc(InlinedAtDL, MF, OS);
+	InlinedAtDL.print(OS);
         OS << " ]";
       }
     }
     if (isIndirectDebugValue())
       OS << " indirect";
-  } else if (!debugLoc.isUnknown() && MF) {
+  } else if (debugLoc && MF) {
     if (!HaveSemi) OS << ";";
     OS << " dbg:";
-    printDebugLoc(debugLoc, MF, OS);
+    debugLoc.print(OS);
   }
 
   OS << '\n';
@@ -1827,8 +1814,7 @@ void MachineInstr::clearRegisterKills(unsigned Reg,
                                       const TargetRegisterInfo *RegInfo) {
   if (!TargetRegisterInfo::isPhysicalRegister(Reg))
     RegInfo = nullptr;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = getOperand(i);
+  for (MachineOperand &MO : operands()) {
     if (!MO.isReg() || !MO.isUse() || !MO.isKill())
       continue;
     unsigned OpReg = MO.getReg();
@@ -1889,6 +1875,22 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
   return true;
 }
 
+void MachineInstr::clearRegisterDeads(unsigned Reg) {
+  for (MachineOperand &MO : operands()) {
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
+      continue;
+    MO.setIsDead(false);
+  }
+}
+
+void MachineInstr::addRegisterDefReadUndef(unsigned Reg) {
+  for (MachineOperand &MO : operands()) {
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
+      continue;
+    MO.setIsUndef();
+  }
+}
+
 void MachineInstr::addRegisterDefined(unsigned Reg,
                                       const TargetRegisterInfo *RegInfo) {
   if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -1896,8 +1898,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg,
     if (MO)
       return;
   } else {
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = getOperand(i);
+    for (const MachineOperand &MO : operands()) {
       if (MO.isReg() && MO.getReg() == Reg && MO.isDef() &&
           MO.getSubReg() == 0)
         return;
@@ -1911,8 +1912,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg,
 void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
                                          const TargetRegisterInfo &TRI) {
   bool HasRegMask = false;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = getOperand(i);
+  for (MachineOperand &MO : operands()) {
     if (MO.isRegMask()) {
       HasRegMask = true;
       continue;
@@ -1920,15 +1920,10 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
-    bool Dead = true;
-    for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
-         I != E; ++I)
-      if (TRI.regsOverlap(*I, Reg)) {
-        Dead = false;
-        break;
-      }
     // If there are no uses, including partial uses, the def is dead.
-    if (Dead) MO.setIsDead();
+    if (std::none_of(UsedRegs.begin(), UsedRegs.end(),
+                     [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+      MO.setIsDead();
   }
 
   // This is a call with a register mask operand.
@@ -1945,8 +1940,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
   SmallVector<size_t, 8> HashComponents;
   HashComponents.reserve(MI->getNumOperands() + 1);
   HashComponents.push_back(MI->getOpcode());
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     if (MO.isReg() && MO.isDef() &&
         TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       continue;  // Skip virtual register defs.
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index cb14a5c..3967a2f 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -10,10 +10,6 @@
 // This pass performs loop invariant code motion on machine instructions. We
 // attempt to remove as much code from the body of a loop as possible.
 //
-// This pass does not attempt to throttle itself to limit register pressure.
-// The register allocation phases are expected to perform rematerialization
-// to recover when register pressure is high.
-//
 // This pass is not intended to be a replacement or a complete alternative
 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
 // constructs that are not exposed before lowering and instruction selection.
@@ -54,6 +50,12 @@ HoistCheapInsts("hoist-cheap-insts",
                 cl::desc("MachineLICM should hoist even cheap instructions"),
                 cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
+                       cl::desc("MachineLICM should sink instructions into "
+                                "loops to avoid register spills"),
+                       cl::init(false), cl::Hidden);
+
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
 STATISTIC(NumLowRP,
@@ -98,7 +100,7 @@ namespace {
     SmallSet<unsigned, 32> RegSeen;
     SmallVector<unsigned, 8> RegPressure;
 
-    // Register pressure "limit" per register class. If the pressure
+    // Register pressure "limit" per register pressure set. If the pressure
     // is higher than the limit, then it's considered high.
     SmallVector<unsigned, 8> RegLimit;
 
@@ -208,7 +210,8 @@ namespace {
     /// CanCauseHighRegPressure - Visit BBs from header to current BB,
     /// check if hoisting an instruction of the given cost matrix can cause high
     /// register pressure.
-    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
+    bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
+                                 bool Cheap);
 
     /// UpdateBackTraceRegPressure - Traverse the back trace from header to
     /// the current block and update their register pressures to reflect the
@@ -243,21 +246,30 @@ namespace {
     void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
     void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
 
-    /// getRegisterClassIDAndCost - For a given MI, register, and the operand
-    /// index, return the ID and cost of its representative register class by
-    /// reference.
-    void getRegisterClassIDAndCost(const MachineInstr *MI,
-                                   unsigned Reg, unsigned OpIdx,
-                                   unsigned &RCId, unsigned &RCCost) const;
+    /// SinkIntoLoop - Sink instructions into loops if profitable. This
+    /// especially tries to prevent register spills caused by register pressure
+    /// if there is little to no overhead moving instructions into loops.
+    void SinkIntoLoop();
 
     /// InitRegPressure - Find all virtual register references that are liveout
     /// of the preheader to initialize the starting "register pressure". Note
     /// this does not count live through (livein but not used) registers.
     void InitRegPressure(MachineBasicBlock *BB);
 
+    /// calcRegisterCost - Calculate the additional register pressure that the
+    /// registers used in MI cause.
+    ///
+    /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+    /// figure out which usages are live-ins.
+    /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
+    DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
+                                             bool ConsiderSeen,
+                                             bool ConsiderUnseenAsDef);
+
     /// UpdateRegPressure - Update estimate of register pressure after the
     /// specified instruction.
-    void UpdateRegPressure(const MachineInstr *MI);
+    void UpdateRegPressure(const MachineInstr *MI,
+                           bool ConsiderUnseenAsDef = false);
 
     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
     /// the load itself could be hoisted. Return the unfolded and hoistable
@@ -343,13 +355,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
 
   if (PreRegAlloc) {
     // Estimate register pressure during pre-regalloc pass.
-    unsigned NumRC = TRI->getNumRegClasses();
-    RegPressure.resize(NumRC);
+    unsigned NumRPS = TRI->getNumRegPressureSets();
+    RegPressure.resize(NumRPS);
     std::fill(RegPressure.begin(), RegPressure.end(), 0);
-    RegLimit.resize(NumRC);
-    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-           E = TRI->regclass_end(); I != E; ++I)
-      RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+    RegLimit.resize(NumRPS);
+    for (unsigned i = 0, e = NumRPS; i != e; ++i)
+      RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
   }
 
   // Get our Loop information...
@@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
       FirstInLoop = true;
       HoistOutOfLoop(N);
       CSEMap.clear();
+
+      if (SinkInstsToAvoidSpills)
+        SinkIntoLoop();
     }
   }
 
@@ -693,6 +707,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
 /// one pass without iteration.
 ///
 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
   SmallVector<MachineDomTreeNode*, 32> Scopes;
   SmallVector<MachineDomTreeNode*, 8> WorkList;
   DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
@@ -700,7 +718,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
 
   // Perform a DFS walk to determine the order of visit.
   WorkList.push_back(HeaderN);
-  do {
+  while (!WorkList.empty()) {
     MachineDomTreeNode *Node = WorkList.pop_back_val();
     assert(Node && "Null dominator tree node?");
     MachineBasicBlock *BB = Node->getBlock();
@@ -734,28 +752,21 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
       ParentMap[Child] = Node;
       WorkList.push_back(Child);
     }
-  } while (!WorkList.empty());
+  }
 
-  if (Scopes.size() != 0) {
-    MachineBasicBlock *Preheader = getCurPreheader();
-    if (!Preheader)
-      return;
+  if (Scopes.size() == 0)
+    return;
 
-    // Compute registers which are livein into the loop headers.
-    RegSeen.clear();
-    BackTrace.clear();
-    InitRegPressure(Preheader);
-  }
+  // Compute registers which are livein into the loop headers.
+  RegSeen.clear();
+  BackTrace.clear();
+  InitRegPressure(Preheader);
 
   // Now perform LICM.
   for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
     MachineDomTreeNode *Node = Scopes[i];
     MachineBasicBlock *MBB = Node->getBlock();
 
-    MachineBasicBlock *Preheader = getCurPreheader();
-    if (!Preheader)
-      continue;
-
     EnterScope(MBB);
 
     // Process the block
@@ -774,27 +785,57 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
   }
 }
 
-static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
-  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
-}
+void MachineLICM::SinkIntoLoop() {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
+  SmallVector<MachineInstr *, 8> Candidates;
+  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
+       I != Preheader->instr_end(); ++I) {
+    // We need to ensure that we can safely move this instruction into the loop.
+    // As such, it must not have side-effects, e.g. such as a call has.  
+    if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
+      Candidates.push_back(I);
+  }
 
-/// getRegisterClassIDAndCost - For a given MI, register, and the operand
-/// index, return the ID and cost of its representative register class.
-void
-MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
-                                       unsigned Reg, unsigned OpIdx,
-                                       unsigned &RCId, unsigned &RCCost) const {
-  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
-  MVT VT = *RC->vt_begin();
-  if (VT == MVT::Untyped) {
-    RCId = RC->getID();
-    RCCost = 1;
-  } else {
-    RCId = TLI->getRepRegClassFor(VT)->getID();
-    RCCost = TLI->getRepRegClassCostFor(VT);
+  for (MachineInstr *I : Candidates) {
+    const MachineOperand &MO = I->getOperand(0);
+    if (!MO.isDef() || !MO.isReg() || !MO.getReg())
+      continue;
+    if (!MRI->hasOneDef(MO.getReg()))
+      continue;
+    bool CanSink = true;
+    MachineBasicBlock *B = nullptr;
+    for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+      // FIXME: Come up with a proper cost model that estimates whether sinking
+      // the instruction (and thus possibly executing it on every loop
+      // iteration) is more expensive than a register.
+      // For now assumes that copies are cheap and thus almost always worth it.
+      if (!MI.isCopy()) {
+        CanSink = false;
+        break;
+      }
+      if (!B) {
+        B = MI.getParent();
+        continue;
+      }
+      B = DT->findNearestCommonDominator(B, MI.getParent());
+      if (!B) {
+        CanSink = false;
+        break;
+      }
+    }
+    if (!CanSink || !B || B == Preheader)
+      continue;
+    B->splice(B->getFirstNonPHI(), Preheader, I);
   }
 }
 
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
 /// InitRegPressure - Find all virtual register references that are liveout of
 /// the preheader to initialize the starting "register pressure". Note this
 /// does not count live through (livein but not used) registers.
@@ -812,41 +853,30 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
       InitRegPressure(*BB->pred_begin());
   }
 
-  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
-       MII != E; ++MII) {
-    MachineInstr *MI = &*MII;
-    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || MO.isImplicit())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(Reg))
-        continue;
-
-      bool isNew = RegSeen.insert(Reg).second;
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-      if (MO.isDef())
-        RegPressure[RCId] += RCCost;
-      else {
-        bool isKill = isOperandKill(MO, MRI);
-        if (isNew && !isKill)
-          // Haven't seen this, it must be a livein.
-          RegPressure[RCId] += RCCost;
-        else if (!isNew && isKill)
-          RegPressure[RCId] -= RCCost;
-      }
-    }
-  }
+  for (const MachineInstr &MI : *BB)
+    UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
 }
 
 /// UpdateRegPressure - Update estimate of register pressure after the
 /// specified instruction.
-void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
-  if (MI->isImplicitDef())
-    return;
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
+                                    bool ConsiderUnseenAsDef) {
+  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
+  for (const auto &RPIdAndCost : Cost) {
+    unsigned Class = RPIdAndCost.first;
+    if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
+      RegPressure[Class] = 0;
+    else
+      RegPressure[Class] += RPIdAndCost.second;
+  }
+}
 
-  SmallVector<unsigned, 4> Defs;
+DenseMap<unsigned, int>
+MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
+                              bool ConsiderUnseenAsDef) {
+  DenseMap<unsigned, int> Cost;
+  if (MI->isImplicitDef())
+    return Cost;
   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -855,27 +885,33 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
 
-    bool isNew = RegSeen.insert(Reg).second;
+    // FIXME: It seems bad to use RegSeen only for some of these calculations.
+    bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+
+    RegClassWeight W = TRI->getRegClassWeight(RC);
+    int RCCost = 0;
     if (MO.isDef())
-      Defs.push_back(Reg);
-    else if (!isNew && isOperandKill(MO, MRI)) {
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-      if (RCCost > RegPressure[RCId])
-        RegPressure[RCId] = 0;
+      RCCost = W.RegWeight;
+    else {
+      bool isKill = isOperandKill(MO, MRI);
+      if (isNew && !isKill && ConsiderUnseenAsDef)
+        // Haven't seen this, it must be a livein.
+        RCCost = W.RegWeight;
+      else if (!isNew && isKill)
+        RCCost = -W.RegWeight;
+    }
+    if (RCCost == 0)
+      continue;
+    const int *PS = TRI->getRegClassPressureSets(RC);
+    for (; *PS != -1; ++PS) {
+      if (Cost.find(*PS) == Cost.end())
+        Cost[*PS] = RCCost;
       else
-        RegPressure[RCId] -= RCCost;
+        Cost[*PS] += RCCost;
     }
   }
-
-  unsigned Idx = 0;
-  while (!Defs.empty()) {
-    unsigned Reg = Defs.pop_back_val();
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
-    RegPressure[RCId] += RCCost;
-    ++Idx;
-  }
+  return Cost;
 }
 
 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction
@@ -898,7 +934,7 @@ static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
   // Check if it's safe to move the instruction.
   bool DontMoveAcrossStore = true;
-  if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
+  if (!I.isSafeToMove(AA, DontMoveAcrossStore))
     return false;
 
   // If it is load then check if it is guaranteed to execute by making sure that
@@ -1067,27 +1103,23 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
 /// if hoisting an instruction of the given cost matrix can cause high
 /// register pressure.
-bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
                                           bool CheapInstr) {
-  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
-       CI != CE; ++CI) {
-    if (CI->second <= 0)
+  for (const auto &RPIdAndCost : Cost) {
+    if (RPIdAndCost.second <= 0)
       continue;
 
-    unsigned RCId = CI->first;
-    unsigned Limit = RegLimit[RCId];
-    int Cost = CI->second;
+    unsigned Class = RPIdAndCost.first;
+    int Limit = RegLimit[Class];
 
     // Don't hoist cheap instructions if they would increase register pressure,
     // even if we're under the limit.
     if (CheapInstr && !HoistCheapInsts)
       return true;
 
-    for (unsigned i = BackTrace.size(); i != 0; --i) {
-      SmallVectorImpl<unsigned> &RP = BackTrace[i-1];
-      if (RP[RCId] + Cost >= Limit)
+    for (const auto &RP : BackTrace)
+      if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
         return true;
-    }
   }
 
   return false;
@@ -1097,46 +1129,15 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
 /// current block and update their register pressures to reflect the effect
 /// of hoisting MI from the current block to the preheader.
 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
-  if (MI->isImplicitDef())
-    return;
-
   // First compute the 'cost' of the instruction, i.e. its contribution
   // to register pressure.
-  DenseMap<unsigned, int> Cost;
-  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.isImplicit())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-    if (MO.isDef()) {
-      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-      if (CI != Cost.end())
-        CI->second += RCCost;
-      else
-        Cost.insert(std::make_pair(RCId, RCCost));
-    } else if (isOperandKill(MO, MRI)) {
-      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-      if (CI != Cost.end())
-        CI->second -= RCCost;
-      else
-        Cost.insert(std::make_pair(RCId, -RCCost));
-    }
-  }
+  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
+                               /*ConsiderUnseenAsDef=*/false);
 
   // Update register pressure of blocks from loop header to current block.
-  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
-    SmallVectorImpl<unsigned> &RP = BackTrace[i];
-    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
-         CI != CE; ++CI) {
-      unsigned RCId = CI->first;
-      RP[RCId] += CI->second;
-    }
-  }
+  for (auto &RP : BackTrace)
+    for (const auto &RPIdAndCost : Cost)
+      RP[RPIdAndCost.first] += RPIdAndCost.second;
 }
 
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
@@ -1171,15 +1172,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (TII->isTriviallyReMaterializable(&MI, AA))
     return true;
 
-  // Estimate register pressure to determine whether to LICM the instruction.
-  // In low register pressure situation, we can be more aggressive about
-  // hoisting. Also, favors hoisting long latency instructions even in
-  // moderately high pressure situation.
-  // Cheap instructions will only be hoisted if they don't increase register
-  // pressure at all.
   // FIXME: If there are long latency loop-invariant instructions inside the
   // loop at this point, why didn't the optimizer's LICM hoist them?
-  DenseMap<unsigned, int> Cost;
   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -1187,24 +1181,22 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
-    if (MO.isDef()) {
-      if (HasHighOperandLatency(MI, i, Reg)) {
-        DEBUG(dbgs() << "Hoist High Latency: " << MI);
-        ++NumHighLatency;
-        return true;
-      }
-      Cost[RCId] += RCCost;
-    } else if (isOperandKill(MO, MRI)) {
-      // Is a virtual register use is a kill, hoisting it out of the loop
-      // may actually reduce register pressure or be register pressure
-      // neutral.
-      Cost[RCId] -= RCCost;
+    if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
+      DEBUG(dbgs() << "Hoist High Latency: " << MI);
+      ++NumHighLatency;
+      return true;
     }
   }
 
+  // Estimate register pressure to determine whether to LICM the instruction.
+  // In low register pressure situation, we can be more aggressive about
+  // hoisting. Also, favors hoisting long latency instructions even in
+  // moderately high pressure situation.
+  // Cheap instructions will only be hoisted if they don't increase register
+  // pressure at all.
+  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
+                               /*ConsiderUnseenAsDef=*/false);
+
   // Visit BBs from header to current BB, if hoisting this doesn't cause
   // high register pressure, then it's safe to proceed.
   if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 89054d4..ce6abdd 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index baad411..d9da7bc 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,10 +9,12 @@
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -116,7 +118,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
   BBCallbacks.back().setMap(this);
   Entry.Index = BBCallbacks.size()-1;
   Entry.Fn = BB->getParent();
-  MCSymbol *Result = Context.CreateTempSymbol();
+  MCSymbol *Result = Context.createTempSymbol();
   Entry.Symbols = Result;
   return Result;
 }
@@ -276,6 +278,7 @@ bool MachineModuleInfo::doInitialization(Module &M) {
   DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(nullptr);
+  PersonalityTypeCache = EHPersonality::Unknown;
   AddrLabelSymbols = nullptr;
   TheModule = nullptr;
 
@@ -398,7 +401,7 @@ void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
 /// addLandingPad - Provide the label of a try LandingPad block.
 ///
 MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
-  MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
+  MCSymbol *LandingPadLabel = Context.createTempSymbol();
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.LandingPadLabel = LandingPadLabel;
   return LandingPadLabel;
@@ -423,6 +426,12 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
     Personalities.push_back(Personality);
 }
 
+void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad,
+                                      int State) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.WinEHState = State;
+}
+
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
 ///
 void MachineModuleInfo::
@@ -452,6 +461,25 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
   LP.TypeIds.push_back(0);
 }
 
+void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad,
+                                           const Function *Filter,
+                                           const BlockAddress *RecoverBA) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  SEHHandler Handler;
+  Handler.FilterOrFinally = Filter;
+  Handler.RecoverBA = RecoverBA;
+  LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
+                                             const Function *Cleanup) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  SEHHandler Handler;
+  Handler.FilterOrFinally = Cleanup;
+  Handler.RecoverBA = nullptr;
+  LP.SEHHandlers.push_back(Handler);
+}
+
 /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
 /// pads.
 void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
@@ -546,9 +574,18 @@ try_next:;
 
 /// getPersonality - Return the personality function for the current function.
 const Function *MachineModuleInfo::getPersonality() const {
-  // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
-  // function
-  return !LandingPads.empty() ? LandingPads[0].Personality : nullptr;
+  for (const LandingPadInfo &LPI : LandingPads)
+    if (LPI.Personality)
+      return LPI.Personality;
+  return nullptr;
+}
+
+EHPersonality MachineModuleInfo::getPersonalityType() {
+  if (PersonalityTypeCache == EHPersonality::Unknown) {
+    if (const Function *F = getPersonality())
+      PersonalityTypeCache = classifyEHPersonality(F);
+  }
+  return PersonalityTypeCache;
 }
 
 /// getPersonalityIndex - Return unique index for current personality
@@ -572,3 +609,18 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   // in the zero index.
   return 0;
 }
+
+const Function *MachineModuleInfo::getWinEHParent(const Function *F) const {
+  StringRef WinEHParentName =
+      F->getFnAttribute("wineh-parent").getValueAsString();
+  if (WinEHParentName.empty() || WinEHParentName == F->getName())
+    return F;
+  return F->getParent()->getFunction(WinEHParentName);
+}
+
+WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) {
+  auto &Ptr = FuncInfoMap[getWinEHParent(F)];
+  if (!Ptr)
+    Ptr.reset(new WinEHFuncInfo);
+  return *Ptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
index a1c7e9f..22d519e 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -31,15 +31,14 @@ static int SortSymbolPair(const void *LHS, const void *RHS) {
   return LHSS->getName().compare(RHSS->getName());
 }
 
-/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
-/// sorted orer.
-MachineModuleInfoImpl::SymbolListTy
-MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
-                                      MachineModuleInfoImpl::StubValueTy>&Map) {
+MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
+    DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) {
   MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
 
   if (!List.empty())
     qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+
+  Map.clear();
   return List;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 86bb34b..278a8f2 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -61,11 +61,11 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg,
 }
 
 bool
-MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
-  const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
+MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   const TargetRegisterClass *OldRC = getRegClass(Reg);
   const TargetRegisterClass *NewRC =
-    getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC);
+      getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF);
 
   // Stop early if there is no room to grow.
   if (NewRC == OldRC)
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index 9fe23c5..44107d6 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -209,6 +209,11 @@ static MachineSchedRegistry
 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
                      useDefaultMachineSched);
 
+static cl::opt<bool> EnableMachineSched(
+    "enable-misched",
+    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
+    cl::Hidden);
+
 /// Forward declare the standard machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
@@ -304,6 +309,12 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 /// general bias against block splitting purely for implementation simplicity.
 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+  if (EnableMachineSched.getNumOccurrences()) {
+    if (!EnableMachineSched)
+      return false;
+  } else if (!mf.getSubtarget().enableMachineScheduler())
+    return false;
+
   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
 
   // Initialize the context of the pass.
@@ -336,9 +347,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   if (skipOptnoneFunction(*mf.getFunction()))
     return false;
 
-  const TargetSubtargetInfo &ST =
-    mf.getTarget().getSubtarget<TargetSubtargetInfo>();
-  if (!ST.enablePostMachineScheduler()) {
+  if (!mf.getSubtarget().enablePostMachineScheduler()) {
     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     return false;
   }
@@ -934,8 +943,9 @@ updateScheduledPressure(const SUnit *SU,
     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
     if (NewMaxPressure[ID] >= Limit - 2) {
       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
-            << NewMaxPressure[ID] << " > " << Limit << "(+ "
-            << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
+            << NewMaxPressure[ID]
+            << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
+            << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
     }
   }
 }
@@ -1027,8 +1037,6 @@ void ScheduleDAGMILive::schedule() {
 
     scheduleMI(SU, IsTopNode);
 
-    updateQueues(SU, IsTopNode);
-
     if (DFSResult) {
       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
       if (!ScheduledTrees.test(SubtreeID)) {
@@ -1040,6 +1048,8 @@ void ScheduleDAGMILive::schedule() {
 
     // Notify the scheduling strategy after updating the DAG.
     SchedImpl->schedNode(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
@@ -1434,12 +1444,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
   // Check if either the dest or source is local. If it's live across a back
   // edge, it's not local. Note that if both vregs are live across the back
   // edge, we cannot successfully contrain the copy without cyclic scheduling.
-  unsigned LocalReg = DstReg;
-  unsigned GlobalReg = SrcReg;
+  // If both the copy's source and dest are local live intervals, then we
+  // should treat the dest as the global for the purpose of adding
+  // constraints. This adds edges from source's other uses to the copy.
+  unsigned LocalReg = SrcReg;
+  unsigned GlobalReg = DstReg;
   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
-    LocalReg = SrcReg;
-    GlobalReg = DstReg;
+    LocalReg = DstReg;
+    GlobalReg = SrcReg;
     LocalLI = &LIS->getInterval(LocalReg);
     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
       return;
@@ -2599,8 +2612,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
                  TryCand, Cand, PhysRegCopy))
     return;
 
-  // Avoid exceeding the target's limit. If signed PSetID is negative, it is
-  // invalid; convert it to INT_MAX to give it lowest priority.
+  // Avoid exceeding the target's limit.
   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
                                                Cand.RPDelta.Excess,
                                                TryCand, Cand, RegExcess))
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 8337793..5f03390 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -70,6 +71,8 @@ namespace {
     // will be split.
     SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit;
 
+    SparseBitVector<> RegsToClearKillFlags;
+
   public:
     static char ID; // Pass identification
     MachineSinking() : MachineFunctionPass(ID) {
@@ -287,6 +290,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     if (!MadeChange) break;
     EverMadeChange = true;
   }
+
+  // Now clear any kill flags for recorded registers.
+  for (auto I : RegsToClearKillFlags)
+    MRI->clearKillFlags(I);
+  RegsToClearKillFlags.clear();
+
   return EverMadeChange;
 }
 
@@ -643,7 +652,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
     return false;
 
   // Check if it's safe to move the instruction.
-  if (!MI->isSafeToMove(TII, AA, SawStore))
+  if (!MI->isSafeToMove(AA, SawStore))
     return false;
 
   // FIXME: This should include support for sinking instructions within the
@@ -656,7 +665,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
 
   bool BreakPHIEdge = false;
   MachineBasicBlock *ParentBlock = MI->getParent();
-  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock,
+                                                     BreakPHIEdge);
 
   // If there are no outputs, it must have side-effects.
   if (!SuccToSinkTo)
@@ -684,7 +694,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
     // other code paths.
     bool TryBreak = false;
     bool store = true;
-    if (!MI->isSafeToMove(TII, AA, store)) {
+    if (!MI->isSafeToMove(AA, store)) {
       DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
       TryBreak = true;
     }
@@ -755,7 +765,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
 
   // Conservatively, clear any kill flags, since it's possible that they are no
   // longer correct.
-  MI->clearKillInfo();
+  // Note that we have to clear the kill flags for any register this instruction
+  // uses as we may sink over another instruction which currently kills the
+  // used registers.
+  for (MachineOperand &MO : MI->operands()) {
+    if (MO.isReg() && MO.isUse())
+      RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
+  }
 
   return true;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 2cf87eb..e07250b 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -52,12 +52,11 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
-  TII = MF->getSubtarget().getInstrInfo();
-  TRI = MF->getSubtarget().getRegisterInfo();
+  const TargetSubtargetInfo &ST = MF->getSubtarget();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
   MRI = &MF->getRegInfo();
   Loops = &getAnalysis<MachineLoopInfo>();
-  const TargetSubtargetInfo &ST =
-    MF->getTarget().getSubtarget<TargetSubtargetInfo>();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
   BlockInfo.resize(MF->getNumBlockIDs());
   ProcResourceCycles.resize(MF->getNumBlockIDs() *
@@ -321,9 +320,7 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
   unsigned CurCount = MTM.getResources(MBB)->InstrCount;
   const MachineBasicBlock *Best = nullptr;
   unsigned BestDepth = 0;
-  for (MachineBasicBlock::const_pred_iterator
-       I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
-    const MachineBasicBlock *Pred = *I;
+  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
     const MachineTraceMetrics::TraceBlockInfo *PredTBI =
       getDepthResources(Pred);
     // Ignore cycles that aren't natural loops.
@@ -345,9 +342,7 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
   const MachineLoop *CurLoop = getLoopFor(MBB);
   const MachineBasicBlock *Best = nullptr;
   unsigned BestHeight = 0;
-  for (MachineBasicBlock::const_succ_iterator
-       I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
-    const MachineBasicBlock *Succ = *I;
+  for (const MachineBasicBlock *Succ : MBB->successors()) {
     // Don't consider back-edges.
     if (CurLoop && Succ == CurLoop->getHeader())
       continue;
@@ -464,13 +459,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
   // Run an upwards post-order search for the trace start.
   Bounds.Downward = false;
   Bounds.Visited.clear();
-  typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
-  for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
-       I != E; ++I) {
+  for (auto I : inverse_post_order_ext(MBB, Bounds)) {
     DEBUG(dbgs() << "  pred for BB#" << I->getNumber() << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the predecessors have been visited, pick the preferred one.
-    TBI.Pred = pickTracePred(*I);
+    TBI.Pred = pickTracePred(I);
     DEBUG({
       if (TBI.Pred)
         dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
@@ -478,19 +471,17 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
         dbgs() << "null\n";
     });
     // The trace leading to I is now known, compute the depth resources.
-    computeDepthResources(*I);
+    computeDepthResources(I);
   }
 
   // Run a downwards post-order search for the trace end.
   Bounds.Downward = true;
   Bounds.Visited.clear();
-  typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
-  for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
-       I != E; ++I) {
+  for (auto I : post_order_ext(MBB, Bounds)) {
     DEBUG(dbgs() << "  succ for BB#" << I->getNumber() << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the successors have been visited, pick the preferred one.
-    TBI.Succ = pickTraceSucc(*I);
+    TBI.Succ = pickTraceSucc(I);
     DEBUG({
       if (TBI.Succ)
         dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
@@ -498,7 +489,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
         dbgs() << "null\n";
     });
     // The trace leaving I is now known, compute the height resources.
-    computeHeightResources(*I);
+    computeHeightResources(I);
   }
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index 364e8e2..f5edcb7 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -55,16 +55,13 @@ namespace {
 
     MachineVerifier(Pass *pass, const char *b) :
       PASS(pass),
-      Banner(b),
-      OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+      Banner(b)
       {}
 
     bool runOnMachineFunction(MachineFunction &MF);
 
     Pass *const PASS;
     const char *Banner;
-    const char *const OutFileName;
-    raw_ostream *OS;
     const MachineFunction *MF;
     const TargetMachine *TM;
     const TargetInstrInfo *TII;
@@ -277,22 +274,6 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
 }
 
 bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
-  raw_ostream *OutFile = nullptr;
-  if (OutFileName) {
-    std::error_code EC;
-    OutFile = new raw_fd_ostream(OutFileName, EC,
-                                 sys::fs::F_Append | sys::fs::F_Text);
-    if (EC) {
-      errs() << "Error opening '" << OutFileName << "': " << EC.message()
-             << '\n';
-      exit(1);
-    }
-
-    OS = OutFile;
-  } else {
-    OS = &errs();
-  }
-
   foundErrors = 0;
 
   this->MF = &MF;
@@ -327,7 +308,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
            MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != MFI) {
         report("Bad instruction parent pointer", MFI);
-        *OS << "Instruction: " << *MBBI;
+        errs() << "Instruction: " << *MBBI;
         continue;
       }
 
@@ -348,8 +329,18 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
       } else if (!CurBundle)
         report("No bundle header", MBBI);
       visitMachineInstrBefore(MBBI);
-      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
-        visitMachineOperand(&MBBI->getOperand(I), I);
+      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+        const MachineInstr &MI = *MBBI;
+        const MachineOperand &Op = MI.getOperand(I);
+        if (Op.getParent() != &MI) {
+          // Make sure to use correct addOperand / RemoveOperand / ChangeTo
+          // functions when replacing operands of a MachineInstr.
+          report("Instruction has operand with wrong parent set", &MI);
+        }
+
+        visitMachineOperand(&Op, I);
+      }
+
       visitMachineInstrAfter(MBBI);
 
       // Was this the last bundled instruction?
@@ -363,9 +354,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   }
   visitMachineFunctionAfter();
 
-  if (OutFile)
-    delete OutFile;
-  else if (foundErrors)
+  if (foundErrors)
     report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
 
   // Clean up.
@@ -382,76 +371,76 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 
 void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
   assert(MF);
-  *OS << '\n';
+  errs() << '\n';
   if (!foundErrors++) {
     if (Banner)
-      *OS << "# " << Banner << '\n';
-    MF->print(*OS, Indexes);
+      errs() << "# " << Banner << '\n';
+    MF->print(errs(), Indexes);
   }
-  *OS << "*** Bad machine code: " << msg << " ***\n"
+  errs() << "*** Bad machine code: " << msg << " ***\n"
       << "- function:    " << MF->getName() << "\n";
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   assert(MBB);
   report(msg, MBB->getParent());
-  *OS << "- basic block: BB#" << MBB->getNumber()
+  errs() << "- basic block: BB#" << MBB->getNumber()
       << ' ' << MBB->getName()
       << " (" << (const void*)MBB << ')';
   if (Indexes)
-    *OS << " [" << Indexes->getMBBStartIdx(MBB)
+    errs() << " [" << Indexes->getMBBStartIdx(MBB)
         << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
-  *OS << '\n';
+  errs() << '\n';
 }
 
 void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
   assert(MI);
   report(msg, MI->getParent());
-  *OS << "- instruction: ";
+  errs() << "- instruction: ";
   if (Indexes && Indexes->hasIndex(MI))
-    *OS << Indexes->getInstructionIndex(MI) << '\t';
-  MI->print(*OS, TM);
+    errs() << Indexes->getInstructionIndex(MI) << '\t';
+  MI->print(errs(), TM);
 }
 
 void MachineVerifier::report(const char *msg,
                              const MachineOperand *MO, unsigned MONum) {
   assert(MO);
   report(msg, MO->getParent());
-  *OS << "- operand " << MONum << ":   ";
-  MO->print(*OS, TM);
-  *OS << "\n";
+  errs() << "- operand " << MONum << ":   ";
+  MO->print(errs(), TRI);
+  errs() << "\n";
 }
 
 void MachineVerifier::report(const char *msg, const MachineFunction *MF,
                              const LiveInterval &LI) {
   report(msg, MF);
-  *OS << "- interval:    " << LI << '\n';
+  errs() << "- interval:    " << LI << '\n';
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
                              const LiveInterval &LI) {
   report(msg, MBB);
-  *OS << "- interval:    " << LI << '\n';
+  errs() << "- interval:    " << LI << '\n';
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
                              const LiveRange &LR, unsigned Reg,
                              unsigned LaneMask) {
   report(msg, MBB);
-  *OS << "- liverange:   " << LR << '\n';
-  *OS << "- register:    " << PrintReg(Reg, TRI) << '\n';
+  errs() << "- liverange:   " << LR << '\n';
+  errs() << "- register:    " << PrintReg(Reg, TRI) << '\n';
   if (LaneMask != 0)
-    *OS << "- lanemask:    " << format("%04X\n", LaneMask);
+    errs() << "- lanemask:    " << format("%04X\n", LaneMask);
 }
 
 void MachineVerifier::report(const char *msg, const MachineFunction *MF,
                              const LiveRange &LR, unsigned Reg,
                              unsigned LaneMask) {
   report(msg, MF);
-  *OS << "- liverange:   " << LR << '\n';
-  *OS << "- register:    " << PrintReg(Reg, TRI) << '\n';
+  errs() << "- liverange:   " << LR << '\n';
+  errs() << "- register:    " << PrintReg(Reg, TRI) << '\n';
   if (LaneMask != 0)
-    *OS << "- lanemask:    " << format("%04X\n", LaneMask);
+    errs() << "- lanemask:    " << format("%04X\n", LaneMask);
 }
 
 void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -539,7 +528,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       report("MBB has successor that isn't part of the function.", MBB);
     if (!MBBInfoMap[*I].Preds.count(MBB)) {
       report("Inconsistent CFG", MBB);
-      *OS << "MBB is not in the predecessor list of the successor BB#"
+      errs() << "MBB is not in the predecessor list of the successor BB#"
           << (*I)->getNumber() << ".\n";
     }
   }
@@ -551,7 +540,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       report("MBB has predecessor that isn't part of the function.", MBB);
     if (!MBBInfoMap[*I].Succs.count(MBB)) {
       report("Inconsistent CFG", MBB);
-      *OS << "MBB is not in the successor list of the predecessor BB#"
+      errs() << "MBB is not in the successor list of the predecessor BB#"
           << (*I)->getNumber() << ".\n";
     }
   }
@@ -726,7 +715,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
     SlotIndex idx = Indexes->getInstructionIndex(MI);
     if (!(idx > lastIndex)) {
       report("Instruction index out of order", MI);
-      *OS << "Last instruction was at " << lastIndex << '\n';
+      errs() << "Last instruction was at " << lastIndex << '\n';
     }
     lastIndex = idx;
   }
@@ -739,7 +728,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
       FirstTerminator = MI;
   } else if (FirstTerminator) {
     report("Non-terminator instruction after the first terminator", MI);
-    *OS << "First terminator was:\t" << *FirstTerminator;
+    errs() << "First terminator was:\t" << *FirstTerminator;
   }
 }
 
@@ -760,7 +749,7 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
   if (!isUInt<5>(MI->getOperand(1).getImm()))
     report("Unknown asm flags", &MI->getOperand(1), 1);
 
-  assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
+  static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed");
 
   unsigned OpNo = InlineAsm::MIOp_FirstOperand;
   unsigned NumOps;
@@ -791,7 +780,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   const MCInstrDesc &MCID = MI->getDesc();
   if (MI->getNumOperands() < MCID.getNumOperands()) {
     report("Too few operands", MI);
-    *OS << MCID.getNumOperands() << " operands expected, but "
+    errs() << MCID.getNumOperands() << " operands expected, but "
         << MI->getNumOperands() << " given.\n";
   }
 
@@ -921,7 +910,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
               TII->getRegClass(MCID, MONum, TRI, *MF)) {
           if (!DRC->contains(Reg)) {
             report("Illegal physical register for instruction", MO, MONum);
-            *OS << TRI->getName(Reg) << " is not a "
+            errs() << TRI->getName(Reg) << " is not a "
                 << TRI->getRegClassName(DRC) << " register.\n";
           }
         }
@@ -933,13 +922,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
             TRI->getSubClassWithSubReg(RC, SubIdx);
           if (!SRC) {
             report("Invalid subregister index for virtual register", MO, MONum);
-            *OS << "Register class " << TRI->getRegClassName(RC)
+            errs() << "Register class " << TRI->getRegClassName(RC)
                 << " does not support subreg index " << SubIdx << "\n";
             return;
           }
           if (RC != SRC) {
             report("Invalid register class for subregister index", MO, MONum);
-            *OS << "Register class " << TRI->getRegClassName(RC)
+            errs() << "Register class " << TRI->getRegClassName(RC)
                 << " does not fully support subreg index " << SubIdx << "\n";
             return;
           }
@@ -948,7 +937,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
               TII->getRegClass(MCID, MONum, TRI, *MF)) {
           if (SubIdx) {
             const TargetRegisterClass *SuperRC =
-              TRI->getLargestLegalSuperClass(RC);
+                TRI->getLargestLegalSuperClass(RC, *MF);
             if (!SuperRC) {
               report("No largest legal super class exists.", MO, MONum);
               return;
@@ -961,7 +950,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           }
           if (!RC->hasSuperClassEq(DRC)) {
             report("Illegal virtual register for instruction", MO, MONum);
-            *OS << "Expected a " << TRI->getRegClassName(DRC)
+            errs() << "Expected a " << TRI->getRegClassName(DRC)
                 << " register, but got a " << TRI->getRegClassName(RC)
                 << " register\n";
           }
@@ -987,11 +976,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       SlotIndex Idx = LiveInts->getInstructionIndex(MI);
       if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
         report("Instruction loads from dead spill slot", MO, MONum);
-        *OS << "Live stack: " << LI << '\n';
+        errs() << "Live stack: " << LI << '\n';
       }
       if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
         report("Instruction stores to dead spill slot", MO, MONum);
-        *OS << "Live stack: " << LI << '\n';
+        errs() << "Live stack: " << LI << '\n';
       }
     }
     break;
@@ -1030,12 +1019,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
             LiveQueryResult LRQ = LR->Query(UseIdx);
             if (!LRQ.valueIn()) {
               report("No live segment at use", MO, MONum);
-              *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+              errs() << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
                   << ' ' << *LR << '\n';
             }
             if (MO->isKill() && !LRQ.isKill()) {
               report("Live range continues after kill flag", MO, MONum);
-              *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n';
+              errs() << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n';
             }
           }
         }
@@ -1048,13 +1037,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
           LiveQueryResult LRQ = LI.Query(UseIdx);
           if (!LRQ.valueIn()) {
             report("No live segment at use", MO, MONum);
-            *OS << UseIdx << " is not live in " << LI << '\n';
+            errs() << UseIdx << " is not live in " << LI << '\n';
           }
           // Check for extra kill flags.
           // Note that we allow missing kill flags for now.
           if (MO->isKill() && !LRQ.isKill()) {
             report("Live range continues after kill flag", MO, MONum);
-            *OS << "Live range: " << LI << '\n';
+            errs() << "Live range: " << LI << '\n';
           }
         } else {
           report("Virtual register has no live interval", MO, MONum);
@@ -1077,6 +1066,25 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
             }
           }
         }
+        // If there is an additional implicit-use of a super register we stop
+        // here. By definition we are fine if the super register is not
+        // (completely) dead, if the complete super register is dead we will
+        // get a report for its operand.
+        if (Bad) {
+          for (const MachineOperand &MOP : MI->uses()) {
+            if (!MOP.isReg())
+              continue;
+            if (!MOP.isImplicit())
+              continue;
+            for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
+                 ++SubRegs) {
+              if (*SubRegs == Reg) {
+                Bad = false;
+                break;
+              }
+            }
+          }
+        }
         if (Bad)
           report("Using an undefined physical register", MO, MONum);
       } else if (MRI->def_empty(Reg)) {
@@ -1118,19 +1126,19 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
           assert(VNI && "NULL valno is not allowed");
           if (VNI->def != DefIdx) {
             report("Inconsistent valno->def", MO, MONum);
-            *OS << "Valno " << VNI->id << " is not defined at "
+            errs() << "Valno " << VNI->id << " is not defined at "
               << DefIdx << " in " << LI << '\n';
           }
         } else {
           report("No live segment at def", MO, MONum);
-          *OS << DefIdx << " is not live in " << LI << '\n';
+          errs() << DefIdx << " is not live in " << LI << '\n';
         }
         // Check that, if the dead def flag is present, LiveInts agree.
         if (MO->isDead()) {
           LiveQueryResult LRQ = LI.Query(DefIdx);
           if (!LRQ.isDeadDef()) {
             report("Live range continues after dead def flag", MO, MONum);
-            *OS << "Live range: " << LI << '\n';
+            errs() << "Live range: " << LI << '\n';
           }
         }
       } else {
@@ -1172,7 +1180,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
     SlotIndex stop = Indexes->getMBBEndIdx(MBB);
     if (!(stop > lastIndex)) {
       report("Block ends before last instruction index", MBB);
-      *OS << "Block ends at " << stop
+      errs() << "Block ends at " << stop
           << " last instruction was at " << lastIndex << '\n';
     }
     lastIndex = stop;
@@ -1274,7 +1282,7 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
            PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
       if (!seen.count(*PrI)) {
         report("Missing PHI operand", &BBI);
-        *OS << "BB#" << (*PrI)->getNumber()
+        errs() << "BB#" << (*PrI)->getNumber()
             << " is a predecessor according to the CFG.\n";
       }
     }
@@ -1305,7 +1313,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
          ++I)
       if (MInfo.regsKilled.count(*I)) {
         report("Virtual register killed in block, but needed live out.", &MBB);
-        *OS << "Virtual register " << PrintReg(*I)
+        errs() << "Virtual register " << PrintReg(*I)
             << " is used after the block.\n";
       }
   }
@@ -1337,13 +1345,13 @@ void MachineVerifier::verifyLiveVariables() {
       if (MInfo.vregsRequired.count(Reg)) {
         if (!VI.AliveBlocks.test(MBB.getNumber())) {
           report("LiveVariables: Block missing from AliveBlocks", &MBB);
-          *OS << "Virtual register " << PrintReg(Reg)
+          errs() << "Virtual register " << PrintReg(Reg)
               << " must be live through the block.\n";
         }
       } else {
         if (VI.AliveBlocks.test(MBB.getNumber())) {
           report("LiveVariables: Block should not be in AliveBlocks", &MBB);
-          *OS << "Virtual register " << PrintReg(Reg)
+          errs() << "Virtual register " << PrintReg(Reg)
               << " is not needed live through the block.\n";
         }
       }
@@ -1362,7 +1370,7 @@ void MachineVerifier::verifyLiveIntervals() {
 
     if (!LiveInts->hasInterval(Reg)) {
       report("Missing live interval for virtual register", MF);
-      *OS << PrintReg(Reg, TRI) << " still has defs or uses\n";
+      errs() << PrintReg(Reg, TRI) << " still has defs or uses\n";
       continue;
     }
 
@@ -1388,13 +1396,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
   if (!DefVNI) {
     report("Valno not live at def and not marked unused", MF, LR, Reg,
            LaneMask);
-    *OS << "Valno #" << VNI->id << '\n';
+    errs() << "Valno #" << VNI->id << '\n';
     return;
   }
 
   if (DefVNI != VNI) {
     report("Live segment at def has different valno", MF, LR, Reg, LaneMask);
-    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
         << " where valno #" << DefVNI->id << " is live\n";
     return;
   }
@@ -1402,7 +1410,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
   const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
   if (!MBB) {
     report("Invalid definition index", MF, LR, Reg, LaneMask);
-    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
         << " in " << LR << '\n';
     return;
   }
@@ -1411,7 +1419,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
     if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
       report("PHIDef value is not defined at MBB start", MBB, LR, Reg,
              LaneMask);
-      *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+      errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
           << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
     }
     return;
@@ -1421,7 +1429,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
   const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
   if (!MI) {
     report("No instruction at def index", MBB, LR, Reg, LaneMask);
-    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
     return;
   }
 
@@ -1449,7 +1457,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
 
     if (!hasDef) {
       report("Defining instruction does not modify register", MI);
-      *OS << "Valno #" << VNI->id << " in " << LR << '\n';
+      errs() << "Valno #" << VNI->id << " in " << LR << '\n';
     }
 
     // Early clobber defs begin at USE slots, but other defs must begin at
@@ -1458,12 +1466,12 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
       if (!VNI->def.isEarlyClobber()) {
         report("Early clobber def must be at an early-clobber slot", MBB, LR,
                Reg, LaneMask);
-        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+        errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
       }
     } else if (!VNI->def.isRegister()) {
       report("Non-PHI, non-early clobber def must be at a register slot",
              MBB, LR, Reg, LaneMask);
-      *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+      errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
     }
   }
 }
@@ -1477,32 +1485,32 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
 
   if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
     report("Foreign valno in live segment", MF, LR, Reg, LaneMask);
-    *OS << S << " has a bad valno\n";
+    errs() << S << " has a bad valno\n";
   }
 
   if (VNI->isUnused()) {
     report("Live segment valno is marked unused", MF, LR, Reg, LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
   }
 
   const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
   if (!MBB) {
     report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
     return;
   }
   SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
   if (S.start != MBBStartIdx && S.start != VNI->def) {
     report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg,
            LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
   }
 
   const MachineBasicBlock *EndMBB =
     LiveInts->getMBBFromIndex(S.end.getPrevSlot());
   if (!EndMBB) {
     report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
     return;
   }
 
@@ -1521,7 +1529,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
   if (!MI) {
     report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg,
            LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
     return;
   }
 
@@ -1529,7 +1537,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
   if (S.end.isBlock()) {
     report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg,
            LaneMask);
-    *OS << S << '\n';
+    errs() << S << '\n';
   }
 
   if (S.end.isDead()) {
@@ -1538,7 +1546,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
     if (!SlotIndex::isSameInstr(S.start, S.end)) {
       report("Live segment ending at dead slot spans instructions", EndMBB, LR,
              Reg, LaneMask);
-      *OS << S << '\n';
+      errs() << S << '\n';
     }
   }
 
@@ -1549,7 +1557,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       report("Live segment ending at early clobber slot must be "
              "redefined by an EC def in the same instruction", EndMBB, LR, Reg,
              LaneMask);
-      *OS << S << '\n';
+      errs() << S << '\n';
     }
   }
 
@@ -1575,10 +1583,11 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!hasRead) {
         // When tracking subregister liveness, the main range must start new
         // values on partial register writes, even if there is no read.
-        if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) {
+        if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 ||
+            !hasSubRegDef) {
           report("Instruction ending live segment doesn't read the register",
                  MI);
-          *OS << S << " in " << LR << '\n';
+          errs() << S << " in " << LR << '\n';
         }
       }
     }
@@ -1619,7 +1628,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!PVNI) {
         report("Register not marked live out of predecessor", *PI, LR, Reg,
                LaneMask);
-        *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+        errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
             << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
             << PEnd << '\n';
         continue;
@@ -1629,7 +1638,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!IsPHI && PVNI != VNI) {
         report("Different value live out of predecessor", *PI, LR, Reg,
                LaneMask);
-        *OS << "Valno #" << PVNI->id << " live out of BB#"
+        errs() << "Valno #" << PVNI->id << " live out of BB#"
             << (*PI)->getNumber() << '@' << PEnd
             << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
             << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
@@ -1651,40 +1660,35 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
 }
 
 void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
-  verifyLiveRange(LI, LI.reg);
-
   unsigned Reg = LI.reg;
-  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-    unsigned Mask = 0;
-    unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
-    for (const LiveInterval::SubRange &SR : LI.subranges()) {
-      if ((Mask & SR.LaneMask) != 0)
-        report("Lane masks of sub ranges overlap in live interval", MF, LI);
-      if ((SR.LaneMask & ~MaxMask) != 0)
-        report("Subrange lanemask is invalid", MF, LI);
-      Mask |= SR.LaneMask;
-      verifyLiveRange(SR, LI.reg, SR.LaneMask);
-      if (!LI.covers(SR))
-        report("A Subrange is not covered by the main range", MF, LI);
-    }
-  } else if (LI.hasSubRanges()) {
-    report("subregister liveness only allowed for virtual registers", MF, LI);
+  assert(TargetRegisterInfo::isVirtualRegister(Reg));
+  verifyLiveRange(LI, Reg);
+
+  unsigned Mask = 0;
+  unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+  for (const LiveInterval::SubRange &SR : LI.subranges()) {
+    if ((Mask & SR.LaneMask) != 0)
+      report("Lane masks of sub ranges overlap in live interval", MF, LI);
+    if ((SR.LaneMask & ~MaxMask) != 0)
+      report("Subrange lanemask is invalid", MF, LI);
+    Mask |= SR.LaneMask;
+    verifyLiveRange(SR, LI.reg, SR.LaneMask);
+    if (!LI.covers(SR))
+      report("A Subrange is not covered by the main range", MF, LI);
   }
 
   // Check the LI only has one connected component.
-  if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-    ConnectedVNInfoEqClasses ConEQ(*LiveInts);
-    unsigned NumComp = ConEQ.Classify(&LI);
-    if (NumComp > 1) {
-      report("Multiple connected components in live interval", MF, LI);
-      for (unsigned comp = 0; comp != NumComp; ++comp) {
-        *OS << comp << ": valnos";
-        for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
-             E = LI.vni_end(); I!=E; ++I)
-          if (comp == ConEQ.getEqClass(*I))
-            *OS << ' ' << (*I)->id;
-        *OS << '\n';
-      }
+  ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+  unsigned NumComp = ConEQ.Classify(&LI);
+  if (NumComp > 1) {
+    report("Multiple connected components in live interval", MF, LI);
+    for (unsigned comp = 0; comp != NumComp; ++comp) {
+      errs() << comp << ": valnos";
+      for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+           E = LI.vni_end(); I!=E; ++I)
+        if (comp == ConEQ.getEqClass(*I))
+          errs() << ' ' << (*I)->id;
+      errs() << '\n';
     }
   }
 }
@@ -1712,8 +1716,8 @@ namespace {
 /// by a FrameDestroy <n>, stack adjustments are identical on all
 /// CFG edges to a merge point, and frame is destroyed at end of a return block.
 void MachineVerifier::verifyStackFrame() {
-  int FrameSetupOpcode   = TII->getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode   = TII->getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
 
   SmallVector<StackStateOfBB, 8> SPState;
   SPState.resize(MF->getNumBlockIDs());
@@ -1764,7 +1768,7 @@ void MachineVerifier::verifyStackFrame() {
                                                BBState.ExitValue;
         if (BBState.ExitIsSetup && AbsSPAdj != Size) {
           report("FrameDestroy <n> is after FrameSetup <m>", &I);
-          *OS << "FrameDestroy <" << Size << "> is after FrameSetup <"
+          errs() << "FrameDestroy <" << Size << "> is after FrameSetup <"
               << AbsSPAdj << ">.\n";
         }
         BBState.ExitValue += Size;
@@ -1781,7 +1785,7 @@ void MachineVerifier::verifyStackFrame() {
           (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue ||
            SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
         report("The exit stack state of a predecessor is inconsistent.", MBB);
-        *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
+        errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
             << SPState[(*I)->getNumber()].ExitValue << ", "
             << SPState[(*I)->getNumber()].ExitIsSetup
             << "), while BB#" << MBB->getNumber() << " has entry state ("
@@ -1797,7 +1801,7 @@ void MachineVerifier::verifyStackFrame() {
           (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue ||
            SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
         report("The entry stack state of a successor is inconsistent.", MBB);
-        *OS << "Successor BB#" << (*I)->getNumber() << " has entry state ("
+        errs() << "Successor BB#" << (*I)->getNumber() << " has entry state ("
             << SPState[(*I)->getNumber()].EntryValue << ", "
             << SPState[(*I)->getNumber()].EntryIsSetup
             << "), while BB#" << MBB->getNumber() << " has exit state ("
diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
index 48db200..17654a6 100644
--- a/contrib/llvm/lib/CodeGen/OcamlGC.cpp
+++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
@@ -20,16 +20,15 @@
 using namespace llvm;
 
 namespace {
-  class OcamlGC : public GCStrategy {
-  public:
-    OcamlGC();
-  };
+class OcamlGC : public GCStrategy {
+public:
+  OcamlGC();
+};
 }
 
-static GCRegistry::Add<OcamlGC>
-X("ocaml", "ocaml 3.10-compatible GC");
+static GCRegistry::Add<OcamlGC> X("ocaml", "ocaml 3.10-compatible GC");
 
-void llvm::linkOcamlGC() { }
+void llvm::linkOcamlGC() {}
 
 OcamlGC::OcamlGC() {
   NeededSafePoints = 1 << GC::PostCall;
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index def2e3d..d514190 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
@@ -46,6 +47,10 @@ SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
                       cl::Hidden, cl::desc("Split all critical edges during "
                                            "PHI elimination"));
 
+static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
+    "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden,
+    cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true."));
+
 namespace {
   class PHIElimination : public MachineFunctionPass {
     MachineRegisterInfo *MRI; // Machine register information
@@ -573,12 +578,14 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // there is a risk it may not be coalesced away.
       //
       // If the copy would be a kill, there is no need to split the edge.
-      if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
+      bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB);
+      if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
         continue;
-
-      DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
-                   << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
-                   << ": " << *BBI);
+      if (ShouldSplit) {
+        DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+                     << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+                     << ": " << *BBI);
+      }
 
       // If Reg is not live-in to MBB, it means it must be live-in to some
       // other PreMBB successor, and we can avoid the interference by splitting
@@ -588,7 +595,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // is likely to be left after coalescing. If we are looking at a loop
       // exiting edge, split it so we won't insert code in the loop, otherwise
       // don't bother.
-      bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;
+      ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB);
 
       // Check for a loop exiting edge.
       if (!ShouldSplit && CurLoop != PreLoop) {
@@ -603,7 +610,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
         // Split unless this edge is entering CurLoop from an outer loop.
         ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
       }
-      if (!ShouldSplit)
+      if (!ShouldSplit && !SplitAllCriticalEdges)
         continue;
       if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
         DEBUG(dbgs() << "Failed to split critical edge.\n");
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index e53e874..6902243 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -14,18 +14,16 @@
 
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 
@@ -54,11 +52,11 @@ static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
 static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
     cl::desc("Disable Machine Common Subexpression Elimination"));
 static cl::opt<cl::boolOrDefault>
-OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
+    EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+                        cl::desc("enable the shrink-wrapping pass"));
+static cl::opt<cl::boolOrDefault> OptimizeRegAlloc(
+    "optimize-regalloc", cl::Hidden,
     cl::desc("Enable optimized register allocation compilation path."));
-static cl::opt<cl::boolOrDefault>
-EnableMachineSched("enable-misched",
-    cl::desc("Enable the machine instruction scheduling pass."));
 static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
     cl::Hidden,
     cl::desc("Disable Machine LICM"));
@@ -82,7 +80,9 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
     cl::desc("Dump garbage collector data"));
 static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
-    cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=nullptr));
+    cl::init(false),
+    cl::ZeroOrMore);
+
 static cl::opt<std::string>
 PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
                    cl::desc("Print machine instrs"),
@@ -115,28 +115,6 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
   return PassID;
 }
 
-/// Allow Pass selection to be overriden by command line options. This supports
-/// flags with ternary conditions. TargetID is passed through by default. The
-/// pass is suppressed when the option is false. When the option is true, the
-/// StandardID is selected if the target provides no default.
-static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
-                                        cl::boolOrDefault Override,
-                                        AnalysisID StandardID) {
-  switch (Override) {
-  case cl::BOU_UNSET:
-    return TargetID;
-  case cl::BOU_TRUE:
-    if (TargetID.isValid())
-      return TargetID;
-    if (StandardID == nullptr)
-      report_fatal_error("Target cannot enable pass");
-    return StandardID;
-  case cl::BOU_FALSE:
-    return IdentifyingPassPtr();
-  }
-  llvm_unreachable("Invalid command line option state");
-}
-
 /// Allow standard passes to be disabled by the command line, regardless of who
 /// is adding the pass.
 ///
@@ -181,9 +159,6 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
   if (StandardID == &MachineCSEID)
     return applyDisable(TargetID, DisableMachineCSE);
 
-  if (StandardID == &MachineSchedulerID)
-    return applyOverride(TargetID, EnableMachineSched, StandardID);
-
   if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
     return applyDisable(TargetID, DisablePostRAMachineLICM);
 
@@ -234,10 +209,10 @@ TargetPassConfig::~TargetPassConfig() {
 // Out of line constructor provides default values for pass options and
 // registers all common codegen passes.
 TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
-  : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
-    Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
-    Impl(nullptr), Initialized(false), DisableVerify(false),
-    EnableTailMerge(true) {
+    : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
+      Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
+      Impl(nullptr), Initialized(false), DisableVerify(false),
+      EnableTailMerge(true), EnableShrinkWrap(false) {
 
   Impl = new PassConfigImpl();
 
@@ -248,11 +223,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
   // Substitute Pseudo Pass IDs for real ones.
   substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
   substitutePass(&PostRAMachineLICMID, &MachineLICMID);
-
-  // Temporarily disable experimental passes.
-  const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
-  if (!ST.useMachineScheduler())
-    disablePass(&MachineSchedulerID);
 }
 
 /// Insert InsertedPassID pass after TargetPassID.
@@ -408,10 +378,8 @@ void TargetPassConfig::addIRPasses() {
 
   // Before running any passes, run the verifier to determine if the input
   // coming from the front-end and/or optimizer is valid.
-  if (!DisableVerify) {
+  if (!DisableVerify)
     addPass(createVerifierPass());
-    addPass(createDebugInfoVerifierPass());
-  }
 
   // Run loop strength reduction before anything else.
   if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
@@ -420,7 +388,10 @@ void TargetPassConfig::addIRPasses() {
       addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
   }
 
+  // Run GC lowering passes for builtin collectors
+  // TODO: add a pass insertion point here
   addPass(createGCLoweringPass());
+  addPass(createShadowStackGCLoweringPass());
 
   // Make sure that no unreachable blocks are instruction selected.
   addPass(createUnreachableBlockEliminationPass());
@@ -448,10 +419,15 @@ void TargetPassConfig::addPassesToHandleExceptions() {
     // FALLTHROUGH
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
-  case ExceptionHandling::ItaniumWinEH:
     addPass(createDwarfEHPass(TM));
     break;
-  case ExceptionHandling::MSVC: // FIXME: Add preparation.
+  case ExceptionHandling::WinEH:
+    // We support using both GCC-style and MSVC-style exceptions on Windows, so
+    // add both preparation passes. Each pass will only actually run if it
+    // recognizes the personality function.
+    addPass(createWinEHPass(TM));
+    addPass(createDwarfEHPass(TM));
+    break;
   case ExceptionHandling::None:
     addPass(createLowerInvokePass());
 
@@ -474,12 +450,6 @@ void TargetPassConfig::addCodeGenPrepare() {
 void TargetPassConfig::addISelPrepare() {
   addPreISel();
 
-  // Need to verify DebugInfo *before* creating the stack protector analysis.
-  // It's a function pass, and verifying between it and its users causes a
-  // crash.
-  if (!DisableVerify)
-    addPass(createDebugInfoVerifierPass());
-
   addPass(createStackProtectorPass(TM));
 
   if (PrintISelInput)
@@ -557,6 +527,8 @@ void TargetPassConfig::addMachinePasses() {
   addPostRegAlloc();
 
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  if (getEnableShrinkWrap())
+    addPass(&ShrinkWrapID);
   addPass(&PrologEpilogCodeInserterID);
 
   /// Add passes that optimize machine instructions after register allocation.
@@ -632,6 +604,21 @@ void TargetPassConfig::addMachineSSAOptimization() {
   addPass(&DeadMachineInstructionElimID);
 }
 
+bool TargetPassConfig::getEnableShrinkWrap() const {
+  switch (EnableShrinkWrapOpt) {
+  case cl::BOU_UNSET:
+    return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None;
+  // If EnableShrinkWrap is set, it takes precedence on whatever the
+  // target sets. The rational is that we assume we want to test
+  // something related to shrink-wrapping.
+  case cl::BOU_TRUE:
+    return true;
+  case cl::BOU_FALSE:
+    return false;
+  }
+  llvm_unreachable("Invalid shrink-wrapping state");
+}
+
 //===---------------------------------------------------------------------===//
 /// Register Allocation Pass Configuration
 //===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 283d1f2..ebe05e3 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -76,6 +76,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -411,8 +412,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
 
   if (ExtendLife && !ExtendedUses.empty())
     // Extend the liveness of the extension result.
-    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
-              std::back_inserter(Uses));
+    Uses.append(ExtendedUses.begin(), ExtendedUses.end());
 
   // Now replace all uses.
   bool Changed = false;
@@ -916,7 +916,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
   // => v0 = COPY v1
   // Currently we haven't seen motivating example for that and we
   // want to avoid untested code.
-  NumRewrittenCopies += Changed == true;
+  NumRewrittenCopies += Changed;
   return Changed;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 89e1d11..55f08e4 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -141,7 +141,7 @@ namespace {
         TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
         SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
 
-    ~SchedulePostRATDList();
+    ~SchedulePostRATDList() override;
 
     /// startBlock - Initialize register live-range state for scheduling in
     /// this block.
@@ -282,9 +282,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   } else {
     // Check that post-RA scheduling is enabled for this target.
     // This may upgrade the AntiDepMode.
-    const TargetSubtargetInfo &ST =
-        Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
-    if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(),
+    if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
                                AntiDepMode, CriticalPathRCs))
       return false;
   }
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 61407fa..76583f0 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -16,7 +16,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PrologEpilogInserter.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -28,8 +27,10 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
@@ -48,6 +49,53 @@ using namespace llvm;
 
 #define DEBUG_TYPE "pei"
 
+namespace {
+class PEI : public MachineFunctionPass {
+public:
+  static char ID;
+  PEI() : MachineFunctionPass(ID) {
+    initializePEIPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+  /// frame indexes with appropriate references.
+  ///
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+  RegScavenger *RS;
+
+  // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+  // stack frame indexes.
+  unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+  // Save and Restore blocks of the current function.
+  MachineBasicBlock *SaveBlock;
+  SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
+
+  // Flag to control whether to use the register scavenger to resolve
+  // frame index materialization registers. Set according to
+  // TRI->requiresFrameIndexScavenging() for the current function.
+  bool FrameIndexVirtualScavenging;
+
+  void calculateSets(MachineFunction &Fn);
+  void calculateCallsInformation(MachineFunction &Fn);
+  void calculateCalleeSavedRegisters(MachineFunction &Fn);
+  void insertCSRSpillsAndRestores(MachineFunction &Fn);
+  void calculateFrameObjectOffsets(MachineFunction &Fn);
+  void replaceFrameIndices(MachineFunction &Fn);
+  void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+                           int &SPAdj);
+  void scavengeFrameVirtualRegs(MachineFunction &Fn);
+  void insertPrologEpilogCode(MachineFunction &Fn);
+
+  // Convenience for recognizing return blocks.
+  bool isReturnBlock(MachineBasicBlock *MBB);
+};
+} // namespace
+
 char PEI::ID = 0;
 char &llvm::PrologEpilogCodeInserterID = PEI::ID;
 
@@ -85,20 +133,26 @@ bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
 
 /// Compute the set of return blocks
 void PEI::calculateSets(MachineFunction &Fn) {
-  // Sets used to compute spill, restore placement sets.
-  const std::vector<CalleeSavedInfo> &CSI =
-    Fn.getFrameInfo()->getCalleeSavedInfo();
+  const MachineFrameInfo *MFI = Fn.getFrameInfo();
 
-  // If no CSRs used, we are done.
-  if (CSI.empty())
+  // Even when we do not change any CSR, we still want to insert the
+  // prologue and epilogue of the function.
+  // So set the save points for those.
+
+  // Use the points found by shrink-wrapping, if any.
+  if (MFI->getSavePoint()) {
+    SaveBlock = MFI->getSavePoint();
+    assert(MFI->getRestorePoint() && "Both restore and save must be set");
+    RestoreBlocks.push_back(MFI->getRestorePoint());
     return;
+  }
 
   // Save refs to entry and return blocks.
-  EntryBlock = Fn.begin();
+  SaveBlock = Fn.begin();
   for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
        MBB != E; ++MBB)
     if (isReturnBlock(MBB))
-      ReturnBlocks.push_back(MBB);
+      RestoreBlocks.push_back(MBB);
 
   return;
 }
@@ -178,7 +232,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   }
 
   delete RS;
-  ReturnBlocks.clear();
+  RestoreBlocks.clear();
   return true;
 }
 
@@ -194,12 +248,12 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
   bool AdjustsStack = MFI->adjustsStack();
 
   // Get the function call frame set-up and tear-down instruction opcode
-  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
   // Early exit for targets which have no call frame setup/destroy pseudo
   // instructions.
-  if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+  if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
     return;
 
   std::vector<MachineBasicBlock::iterator> FrameSDOps;
@@ -324,6 +378,61 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
   MFI->setCalleeSavedInfo(CSI);
 }
 
+/// Helper function to update the liveness information for the callee-saved
+/// registers.
+static void updateLiveness(MachineFunction &MF) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Visited will contain all the basic blocks that are in the region
+  // where the callee saved registers are alive:
+  // - Anything that is not Save or Restore -> LiveThrough.
+  // - Save -> LiveIn.
+  // - Restore -> LiveOut.
+  // The live-out is not attached to the block, so no need to keep
+  // Restore in this set.
+  SmallPtrSet<MachineBasicBlock *, 8> Visited;
+  SmallVector<MachineBasicBlock *, 8> WorkList;
+  MachineBasicBlock *Entry = &MF.front();
+  MachineBasicBlock *Save = MFI->getSavePoint();
+
+  if (!Save)
+    Save = Entry;
+
+  if (Entry != Save) {
+    WorkList.push_back(Entry);
+    Visited.insert(Entry);
+  }
+  Visited.insert(Save);
+
+  MachineBasicBlock *Restore = MFI->getRestorePoint();
+  if (Restore)
+    // By construction Restore cannot be visited, otherwise it
+    // means there exists a path to Restore that does not go
+    // through Save.
+    WorkList.push_back(Restore);
+
+  while (!WorkList.empty()) {
+    const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+    // By construction, the region that is after the save point is
+    // dominated by the Save and post-dominated by the Restore.
+    if (CurBB == Save)
+      continue;
+    // Enqueue all the successors not already visited.
+    // Those are by construction either before Save or after Restore.
+    for (MachineBasicBlock *SuccBB : CurBB->successors())
+      if (Visited.insert(SuccBB).second)
+        WorkList.push_back(SuccBB);
+  }
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    for (MachineBasicBlock *MBB : Visited)
+      // Add the callee-saved register as live-in.
+      // It's killed at the spill.
+      MBB->addLiveIn(CSI[i].getReg());
+  }
+}
+
 /// insertCSRSpillsAndRestores - Insert spill and restore code for
 /// callee saved registers used in the function.
 ///
@@ -344,26 +453,22 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   MachineBasicBlock::iterator I;
 
   // Spill using target interface.
-  I = EntryBlock->begin();
-  if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+  I = SaveBlock->begin();
+  if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      // Add the callee-saved register as live-in.
-      // It's killed at the spill.
-      EntryBlock->addLiveIn(CSI[i].getReg());
-
       // Insert the spill to the stack frame.
       unsigned Reg = CSI[i].getReg();
       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(),
+      TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
                               RC, TRI);
     }
   }
+  // Update the live-in information of all the blocks up to the save point.
+  updateLiveness(Fn);
 
   // Restore using target interface.
-  for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
-    MachineBasicBlock *MBB = ReturnBlocks[ri];
+  for (MachineBasicBlock *MBB : RestoreBlocks) {
     I = MBB->end();
-    --I;
 
     // Skip over all terminator instructions, which are part of the return
     // sequence.
@@ -495,7 +600,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
       unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
-      Offset = (Offset+Align-1)/Align*Align;
+      Offset = RoundUpToAlignment(Offset, Align);
 
       MFI->setObjectOffset(i, -Offset);        // Set the computed offset
     }
@@ -504,7 +609,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
       unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
-      Offset = (Offset+Align-1)/Align*Align;
+      Offset = RoundUpToAlignment(Offset, Align);
 
       MFI->setObjectOffset(i, Offset);
       Offset += MFI->getObjectSize(i);
@@ -537,7 +642,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     unsigned Align = MFI->getLocalFrameMaxAlign();
 
     // Adjust to alignment boundary.
-    Offset = (Offset + Align - 1) / Align * Align;
+    Offset = RoundUpToAlignment(Offset, Align);
 
     DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
 
@@ -656,8 +761,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // If the frame pointer is eliminated, all frame offsets will be relative to
     // SP not FP. Align to MaxAlign so this works.
     StackAlign = std::max(StackAlign, MaxAlign);
-    unsigned AlignMask = StackAlign - 1;
-    Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+    Offset = RoundUpToAlignment(Offset, StackAlign);
   }
 
   // Update frame info to pretend that this is part of the stack...
@@ -674,21 +778,18 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
 
   // Add prologue to the function...
-  TFI.emitPrologue(Fn);
+  TFI.emitPrologue(Fn, *SaveBlock);
 
-  // Add epilogue to restore the callee-save registers in each exiting block
-  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().isReturn())
-      TFI.emitEpilogue(Fn, *I);
-  }
+  // Add epilogue to restore the callee-save registers in each exiting block.
+  for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+    TFI.emitEpilogue(Fn, *RestoreBlock);
 
   // Emit additional code that is required to support segmented stacks, if
   // we've been asked for it.  This, when linked with a runtime with support
   // for segmented stacks (libgcc is one), will result in allocating stack
   // space in small chunks instead of one large contiguous block.
   if (Fn.shouldSplitStack())
-    TFI.adjustForSegmentedStacks(Fn);
+    TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
 
   // Emit additional code that is required to explicitly handle the stack in
   // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
@@ -696,7 +797,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // different conditional check and another BIF for allocating more stack
   // space.
   if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
-    TFI.adjustForHiPEPrologue(Fn);
+    TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
 }
 
 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
@@ -706,6 +807,25 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   if (!TFI.needsFrameIndexResolution(Fn)) return;
 
+  MachineModuleInfo &MMI = Fn.getMMI();
+  const Function *F = Fn.getFunction();
+  const Function *ParentF = MMI.getWinEHParent(F);
+  unsigned FrameReg;
+  if (F == ParentF) {
+    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
+    // FIXME: This should be unconditional but we have bugs in the preparation
+    // pass.
+    if (FuncInfo.UnwindHelpFrameIdx != INT_MAX)
+      FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP(
+          Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg);
+  } else if (MMI.hasWinEHFuncInfo(F)) {
+    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
+    auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F);
+    if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end())
+      FuncInfo.CatchHandlerParentFrameObjOffset[F] =
+          TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg);
+  }
+
   // Store SPAdj at exit of a basic block.
   SmallVector<int, 8> SPState;
   SPState.resize(Fn.getNumBlockIDs());
@@ -744,8 +864,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
   const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
-  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
   if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
 
@@ -811,17 +931,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
         continue;
       }
 
-      // Frame allocations are target independent. Simply swap the index with
-      // the offset.
-      if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) {
-        assert(TFI->hasFP(Fn) && "frame alloc requires FP");
-        MachineOperand &FI = MI->getOperand(i);
-        unsigned Reg;
-        int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg);
-        FI.ChangeToImmediate(FrameOffset);
-        continue;
-      }
-
       // Some instructions (e.g. inline asm instructions) can have
       // multiple frame indices and/or cause eliminateFrameIndex
       // to insert more than one instruction. We need the register
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
deleted file mode 100644
index f88b8ef..0000000
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is responsible for finalizing the functions frame layout, saving
-// callee saved registers, and for emitting prolog & epilog code for the
-// function.
-//
-// This pass must be run after register allocation.  After this pass is
-// executed, it is illegal to construct MO_FrameIndex operands.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
-#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-namespace llvm {
-  class RegScavenger;
-  class MachineBasicBlock;
-
-  class PEI : public MachineFunctionPass {
-  public:
-    static char ID;
-    PEI() : MachineFunctionPass(ID) {
-      initializePEIPass(*PassRegistry::getPassRegistry());
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-    /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
-    /// frame indexes with appropriate references.
-    ///
-    bool runOnMachineFunction(MachineFunction &Fn) override;
-
-  private:
-    RegScavenger *RS;
-
-    // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
-    // stack frame indexes.
-    unsigned MinCSFrameIndex, MaxCSFrameIndex;
-
-    // Entry and return blocks of the current function.
-    MachineBasicBlock* EntryBlock;
-    SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
-
-    // Flag to control whether to use the register scavenger to resolve
-    // frame index materialization registers. Set according to
-    // TRI->requiresFrameIndexScavenging() for the curren function.
-    bool FrameIndexVirtualScavenging;
-
-    void calculateSets(MachineFunction &Fn);
-    void calculateCallsInformation(MachineFunction &Fn);
-    void calculateCalleeSavedRegisters(MachineFunction &Fn);
-    void insertCSRSpillsAndRestores(MachineFunction &Fn);
-    void calculateFrameObjectOffsets(MachineFunction &Fn);
-    void replaceFrameIndices(MachineFunction &Fn);
-    void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
-                             int &SPAdj);
-    void scavengeFrameVirtualRegs(MachineFunction &Fn);
-    void insertPrologEpilogCode(MachineFunction &Fn);
-
-    // Convenience for recognizing return blocks.
-    bool isReturnBlock(MachineBasicBlock* MBB);
-  };
-} // End llvm namespace
-#endif
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
index 6b346f4..16ff48e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -27,6 +27,7 @@
 #endif
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index c621414..fd3d4d7 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -301,13 +301,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
       const MDNode *Expr = DBG->getDebugExpression();
       bool IsIndirect = DBG->isIndirectDebugValue();
       uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
-      DebugLoc DL;
-      if (MI == MBB->end()) {
-        // If MI is at basic block end then use last instruction's location.
-        MachineBasicBlock::iterator EI = MI;
-        DL = (--EI)->getDebugLoc();
-      } else
-        DL = MI->getDebugLoc();
+      DebugLoc DL = DBG->getDebugLoc();
+      assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       MachineInstr *NewDV =
           BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
               .addFrameIndex(FI)
@@ -877,6 +873,9 @@ void RAFast::AllocateBasicBlock() {
               const MDNode *Expr = MI->getDebugExpression();
               DebugLoc DL = MI->getDebugLoc();
               MachineBasicBlock *MBB = MI->getParent();
+              assert(
+                  cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+                  "Expected inlined-at fields to agree");
               MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
                                             TII->get(TargetOpcode::DBG_VALUE))
                                         .addFrameIndex(SS)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index edc3294..26f42c9 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -538,8 +538,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
     // Giant live ranges fall back to the global assignment heuristic, which
     // prevents excessive spilling in pathological cases.
     bool ReverseLocal = TRI->reverseLocalAssignment();
+    const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
     bool ForceGlobal = !ReverseLocal &&
-      (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs());
+      (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
 
     if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
         LIS->intervalIsInOneMBB(*LI)) {
@@ -552,10 +553,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
         // Allocating bottom up may allow many short LRGs to be assigned first
         // to one of the cheap registers. This could be much faster for very
         // large blocks on targets with many physical registers.
-        Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex());
+        Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
       }
-    }
-    else {
+      Prio |= RC.AllocationPriority << 24;
+    } else {
       // Allocate global and split ranges in long->short order. Long ranges that
       // don't fit should be spilled (or split) ASAP so they don't create
       // interference.  Mark a bit to prioritize global above local ranges.
@@ -1554,7 +1555,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
   DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
 
-  const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC);
+  const TargetRegisterClass *SuperRC =
+      TRI->getLargestLegalSuperClass(CurRC, *MF);
   unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
   // Split around every non-copy instruction if this split will relax
   // the constraints on the virtual register.
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index eb7e563..eeff73d 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -126,7 +126,12 @@ private:
   void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
 
   /// \brief Constructs an initial graph.
-  void initializeGraph(PBQPRAGraph &G);
+  void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
+
+  /// \brief Spill the given VReg.
+  void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
+                 MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
+                 Spiller &VRegSpiller);
 
   /// \brief Given a solved PBQP problem maps this solution back to a register
   /// assignment.
@@ -172,11 +177,41 @@ public:
 class Interference : public PBQPRAConstraint {
 private:
 
-private:
-
   typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
-  typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey;
-  typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+  typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
+  typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+  typedef DenseSet<IKey> DisjointAllowedRegsCache;
+  typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
+  typedef DenseSet<IEdgeKey> IEdgeCache;
+
+  bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+                               PBQPRAGraph::NodeId MId,
+                               const DisjointAllowedRegsCache &D) const {
+    const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+    const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+    if (NRegs == MRegs)
+      return false;
+
+    if (NRegs < MRegs)
+      return D.count(IKey(NRegs, MRegs)) > 0;
+
+    return D.count(IKey(MRegs, NRegs)) > 0;
+  }
+
+  void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+                              PBQPRAGraph::NodeId MId,
+                              DisjointAllowedRegsCache &D) {
+    const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+    const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+    assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
+
+    if (NRegs < MRegs)
+      D.insert(IKey(NRegs, MRegs));
+    else
+      D.insert(IKey(MRegs, NRegs));
+  }
 
   // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
   // for the fast interference graph construction algorithm. The last is there
@@ -244,6 +279,13 @@ public:
     // and uniquing them.
     IMatrixCache C;
 
+    // Finding an edge is expensive in the worst case (O(max_clique(G))). So
+    // cache locally edges we have already seen.
+    IEdgeCache EC;
+
+    // Cache known disjoint allowed registers pairs
+    DisjointAllowedRegsCache D;
+
     typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
     typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
                                 decltype(&lowestStartPoint)> IntervalQueue;
@@ -287,14 +329,21 @@ public:
       for (const auto &A : Active) {
         PBQP::GraphBase::NodeId MId = getNodeId(A);
 
+        // Do not add an edge when the nodes' allowed registers do not
+        // intersect: there is obviously no interference.
+        if (haveDisjointAllowedRegs(G, NId, MId, D))
+          continue;
+
         // Check that we haven't already added this edge
-        // FIXME: findEdge is expensive in the worst case (O(max_clique(G))).
-        //        It might be better to replace this with a local bit-matrix.
-        if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId())
+        IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
+        if (EC.count(EK))
           continue;
 
         // This is a new edge - add it to the graph.
-        createInterferenceEdge(G, NId, MId, C);
+        if (!createInterferenceEdge(G, NId, MId, C))
+          setDisjointAllowedRegs(G, NId, MId, D);
+        else
+          EC.insert(EK);
       }
 
       // Finally, add Cur to the Active set.
@@ -304,35 +353,48 @@ public:
 
 private:
 
-  void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
-                              PBQPRAGraph::NodeId MId, IMatrixCache &C) {
+  // Create an Interference edge and add it to the graph, unless it is
+  // a null matrix, meaning the nodes' allowed registers do not have any
+  // interference. This case occurs frequently between integer and floating
+  // point registers for example.
+  // return true iff both nodes interferes.
+  bool createInterferenceEdge(PBQPRAGraph &G,
+                              PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
+                              IMatrixCache &C) {
 
     const TargetRegisterInfo &TRI =
-      *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
-
+        *G.getMetadata().MF.getSubtarget().getRegisterInfo();
     const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
     const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
 
     // Try looking the edge costs up in the IMatrixCache first.
-    IMatrixKey K(&NRegs, &MRegs);
+    IKey K(&NRegs, &MRegs);
     IMatrixCache::iterator I = C.find(K);
     if (I != C.end()) {
       G.addEdgeBypassingCostAllocator(NId, MId, I->second);
-      return;
+      return true;
     }
 
     PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
+    bool NodesInterfere = false;
     for (unsigned I = 0; I != NRegs.size(); ++I) {
       unsigned PRegN = NRegs[I];
       for (unsigned J = 0; J != MRegs.size(); ++J) {
         unsigned PRegM = MRegs[J];
-        if (TRI.regsOverlap(PRegN, PRegM))
+        if (TRI.regsOverlap(PRegN, PRegM)) {
           M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+          NodesInterfere = true;
+        }
       }
     }
 
+    if (!NodesInterfere)
+      return false;
+
     PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
     C[K] = G.getEdgeCostsPtr(EId);
+
+    return true;
   }
 };
 
@@ -342,7 +404,7 @@ public:
   void apply(PBQPRAGraph &G) override {
     MachineFunction &MF = G.getMetadata().MF;
     MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
-    CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo());
+    CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
 
     // Scan the machine function and add a coalescing cost whenever CoalescerPair
     // gives the Ok.
@@ -398,7 +460,7 @@ public:
             }
             PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
             addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
-            G.setEdgeCosts(EId, std::move(Costs));
+            G.updateEdgeCosts(EId, std::move(Costs));
           }
         }
       }
@@ -488,15 +550,21 @@ static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
   return false;
 }
 
-void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
+void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
+                                   Spiller &VRegSpiller) {
   MachineFunction &MF = G.getMetadata().MF;
 
   LiveIntervals &LIS = G.getMetadata().LIS;
   const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
   const TargetRegisterInfo &TRI =
-    *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+      *G.getMetadata().MF.getSubtarget().getRegisterInfo();
+
+  std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
+
+  while (!Worklist.empty()) {
+    unsigned VReg = Worklist.back();
+    Worklist.pop_back();
 
-  for (auto VReg : VRegsToAlloc) {
     const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
     LiveInterval &VRegLI = LIS.getInterval(VReg);
 
@@ -531,6 +599,15 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
       VRegAllowed.push_back(PReg);
     }
 
+    // Check for vregs that have no allowed registers. These should be
+    // pre-spilled and the new vregs added to the worklist.
+    if (VRegAllowed.empty()) {
+      SmallVector<unsigned, 8> NewVRegs;
+      spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+      Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
+      continue;
+    }
+
     PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
 
     // Tweak cost of callee saved registers, as using then force spilling and
@@ -547,14 +624,40 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
   }
 }
 
+void RegAllocPBQP::spillVReg(unsigned VReg,
+                             SmallVectorImpl<unsigned> &NewIntervals,
+                             MachineFunction &MF, LiveIntervals &LIS,
+                             VirtRegMap &VRM, Spiller &VRegSpiller) {
+
+  VRegsToAlloc.erase(VReg);
+  LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
+  VRegSpiller.spill(LRE);
+
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  (void)TRI;
+  DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
+               << LRE.getParent().weight << ", New vregs: ");
+
+  // Copy any newly inserted live intervals into the list of regs to
+  // allocate.
+  for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
+       I != E; ++I) {
+    const LiveInterval &LI = LIS.getInterval(*I);
+    assert(!LI.empty() && "Empty spill range.");
+    DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
+    VRegsToAlloc.insert(LI.reg);
+  }
+
+  DEBUG(dbgs() << ")\n");
+}
+
 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
                                      const PBQP::Solution &Solution,
                                      VirtRegMap &VRM,
                                      Spiller &VRegSpiller) {
   MachineFunction &MF = G.getMetadata().MF;
   LiveIntervals &LIS = G.getMetadata().LIS;
-  const TargetRegisterInfo &TRI =
-    *MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   (void)TRI;
 
   // Set to true if we have any spills
@@ -576,28 +679,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
       assert(PReg != 0 && "Invalid preg selected.");
       VRM.assignVirt2Phys(VReg, PReg);
     } else {
-      VRegsToAlloc.erase(VReg);
-      SmallVector<unsigned, 8> NewSpills;
-      LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM);
-      VRegSpiller.spill(LRE);
-
-      DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
-                   << LRE.getParent().weight << ", New vregs: ");
-
-      // Copy any newly inserted live intervals into the list of regs to
-      // allocate.
-      for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
-           I != E; ++I) {
-        LiveInterval &LI = LIS.getInterval(*I);
-        assert(!LI.empty() && "Empty spill range.");
-        DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
-        VRegsToAlloc.insert(LI.reg);
-      }
-
-      DEBUG(dbgs() << ")\n");
-
-      // We need another round if spill intervals were added.
-      AnotherRoundNeeded |= !LRE.empty();
+      // Spill VReg. If this introduces new intervals we'll need another round
+      // of allocation.
+      SmallVector<unsigned, 8> NewVRegs;
+      spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+      AnotherRoundNeeded |= !NewVRegs.empty();
     }
   }
 
@@ -670,7 +756,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   // If there are non-empty intervals allocate them using pbqp.
   if (!VRegsToAlloc.empty()) {
 
-    const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl();
+    const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
     std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
       llvm::make_unique<PBQPRAConstraintList>();
     ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
@@ -686,7 +772,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << Round << ":\n");
 
       PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
-      initializeGraph(G);
+      initializeGraph(G, VRM, *VRegSpiller);
       ConstraintsRoot->apply(G);
 
 #ifndef NDEBUG
@@ -699,7 +785,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
         raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
         DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
               << GraphFileName << "\"\n");
-        G.dumpToStream(OS);
+        G.dump(OS);
       }
 #endif
 
@@ -719,6 +805,79 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   return true;
 }
 
+namespace {
+// A helper class for printing node and register info in a consistent way
+class PrintNodeInfo {
+public:
+  typedef PBQP::RegAlloc::PBQPRAGraph Graph;
+  typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId;
+
+  PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {}
+
+  void print(raw_ostream &OS) const {
+    const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+    const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+    unsigned VReg = G.getNodeMetadata(NId).getVReg();
+    const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
+    OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
+  }
+
+private:
+  const Graph &G;
+  NodeId NId;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) {
+  PR.print(OS);
+  return OS;
+}
+} // anonymous namespace
+
+void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
+  for (auto NId : nodeIds()) {
+    const Vector &Costs = getNodeCosts(NId);
+    assert(Costs.getLength() != 0 && "Empty vector in graph.");
+    OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
+  }
+  OS << '\n';
+
+  for (auto EId : edgeIds()) {
+    NodeId N1Id = getEdgeNode1Id(EId);
+    NodeId N2Id = getEdgeNode2Id(EId);
+    assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
+    const Matrix &M = getEdgeCosts(EId);
+    assert(M.getRows() != 0 && "No rows in matrix.");
+    assert(M.getCols() != 0 && "No cols in matrix.");
+    OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
+    OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
+    OS << M << '\n';
+  }
+}
+
+void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
+
+void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
+  OS << "graph {\n";
+  for (auto NId : nodeIds()) {
+    OS << "  node" << NId << " [ label=\""
+       << PrintNodeInfo(NId, *this) << "\\n"
+       << getNodeCosts(NId) << "\" ]\n";
+  }
+
+  OS << "  edge [ len=" << nodeIds().size() << " ]\n";
+  for (auto EId : edgeIds()) {
+    OS << "  node" << getEdgeNode1Id(EId)
+       << " -- node" << getEdgeNode2Id(EId)
+       << " [ label=\"";
+    const Matrix &EdgeCosts = getEdgeCosts(EId);
+    for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
+      OS << EdgeCosts.getRowAsVector(i) << "\\n";
+    }
+    OS << "\" ]\n";
+  }
+  OS << "}\n";
+}
+
 FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
   return new RegAllocPBQP(customPassID);
 }
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
index ab33672..178fa18 100644
--- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -131,7 +131,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
     RCI.NumRegs = StressRA;
 
   // Check if RC is a proper sub-class.
-  if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
+  if (const TargetRegisterClass *Super =
+          TRI->getLargestLegalSuperClass(RC, *MF))
     if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
       RCI.ProperSubClass = true;
 
@@ -175,6 +176,6 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
   }
   compute(RC);
   unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
-  return TRI->getRegPressureSetLimit(Idx)
-    - TRI->getRegClassWeight(RC).RegWeight * NReserved;
+  return TRI->getRegPressureSetLimit(*MF, Idx) -
+         TRI->getRegClassWeight(RC).RegWeight * NReserved;
 }
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index b8cae4a..ac7d98f 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -58,12 +58,16 @@ EnableJoining("join-liveintervals",
               cl::desc("Coalesce copies (default=true)"),
               cl::init(true));
 
-// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool> UseTerminalRule("terminal-rule",
+                                     cl::desc("Apply the terminal rule"),
+                                     cl::init(false), cl::Hidden);
+
+/// Temporary flag to test critical edge unsplitting.
 static cl::opt<bool>
 EnableJoinSplits("join-splitedges",
   cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
 
-// Temporary flag to test global copy optimization.
+/// Temporary flag to test global copy optimization.
 static cl::opt<cl::boolOrDefault>
 EnableGlobalCopies("join-globalcopies",
   cl::desc("Coalesce copies that span blocks (default=subtarget)"),
@@ -120,7 +124,7 @@ namespace {
     /// Recursively eliminate dead defs in DeadDefs.
     void eliminateDeadDefs();
 
-    /// LiveRangeEdit callback.
+    /// LiveRangeEdit callback for eliminateDeadDefs().
     void LRE_WillEraseInstruction(MachineInstr *MI) override;
 
     /// Coalesce the LocalWorkList.
@@ -133,16 +137,15 @@ namespace {
     /// copies that cannot yet be coalesced into WorkList.
     void copyCoalesceInMBB(MachineBasicBlock *MBB);
 
-    /// Try to coalesce all copies in CurrList. Return
-    /// true if any progress was made.
+    /// Tries to coalesce all copies in CurrList. Returns true if any progress
+    /// was made.
     bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
 
-    /// Attempt to join intervals corresponding to SrcReg/DstReg,
-    /// which are the src/dst of the copy instruction CopyMI.  This returns
-    /// true if the copy was successfully coalesced away. If it is not
-    /// currently possible to coalesce this interval, but it may be possible if
-    /// other things get coalesced, then it returns true by reference in
-    /// 'Again'.
+    /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
+    /// src/dst of the copy instruction CopyMI.  This returns true if the copy
+    /// was successfully coalesced away. If it is not currently possible to
+    /// coalesce this interval, but it may be possible if other things get
+    /// coalesced, then it returns true by reference in 'Again'.
     bool joinCopy(MachineInstr *TheCopy, bool &Again);
 
     /// Attempt to join these two intervals.  On failure, this
@@ -161,18 +164,20 @@ namespace {
     /// LaneMask are split as necessary. @p LaneMask are the lanes that
     /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
     /// lanemasks already adjusted to the coalesced register.
-    void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+    /// @returns false if live range conflicts couldn't get resolved.
+    bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
                            unsigned LaneMask, CoalescerPair &CP);
 
     /// Join the liveranges of two subregisters. Joins @p RRange into
     /// @p LRange, @p RRange may be invalid afterwards.
-    void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+    /// @returns false if live range conflicts couldn't get resolved.
+    bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
                           unsigned LaneMask, const CoalescerPair &CP);
 
-    /// We found a non-trivially-coalescable copy. If
-    /// the source value number is defined by a copy from the destination reg
-    /// see if we can merge these two destination reg valno# into a single
-    /// value number, eliminating a copy.
+    /// We found a non-trivially-coalescable copy. If the source value number is
+    /// defined by a copy from the destination reg see if we can merge these two
+    /// destination reg valno# into a single value number, eliminating a copy.
+    /// This returns true if an interval was modified.
     bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
 
     /// Return true if there are definitions of IntB
@@ -184,28 +189,69 @@ namespace {
     /// If the source value number is defined by a commutable instruction and
     /// its other operand is coalesced to the copy dest register, see if we
     /// can transform the copy into a noop by commuting the definition.
+    /// This returns true if an interval was modified.
     bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
 
     /// If the source of a copy is defined by a
     /// trivial computation, replace the copy by rematerialize the definition.
-    bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI,
+    bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
                                  bool &IsDefCopy);
 
-    /// Return true if a physreg copy should be joined.
+    /// Return true if a copy involving a physreg should be joined.
     bool canJoinPhys(const CoalescerPair &CP);
 
-    /// Replace all defs and uses of SrcReg to DstReg and
-    /// update the subregister number if it is not zero. If DstReg is a
-    /// physical register and the existing subregister number of the def / use
-    /// being updated is not zero, make sure to set it to the correct physical
-    /// subregister.
+    /// Replace all defs and uses of SrcReg to DstReg and update the subregister
+    /// number if it is not zero. If DstReg is a physical register and the
+    /// existing subregister number of the def / use being updated is not zero,
+    /// make sure to set it to the correct physical subregister.
     void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
 
     /// Handle copies of undef values.
+    /// Returns true if @p CopyMI was a copy of an undef value and eliminated.
     bool eliminateUndefCopy(MachineInstr *CopyMI);
 
+    /// Check whether or not we should apply the terminal rule on the
+    /// destination (Dst) of \p Copy.
+    /// When the terminal rule applies, Copy is not profitable to
+    /// coalesce.
+    /// Dst is terminal if it has exactly one affinity (Dst, Src) and
+    /// at least one interference (Dst, Dst2). If Dst is terminal, the
+    /// terminal rule consists in checking that at least one of
+    /// interfering node, say Dst2, has an affinity of equal or greater
+    /// weight with Src.
+    /// In that case, Dst2 and Dst will not be able to be both coalesced
+    /// with Src. Since Dst2 exposes more coalescing opportunities than
+    /// Dst, we can drop \p Copy.
+    bool applyTerminalRule(const MachineInstr &Copy) const;
+
+    /// Check whether or not \p LI is composed by multiple connected
+    /// components and if that is the case, fix that.
+    void splitNewRanges(LiveInterval *LI) {
+      ConnectedVNInfoEqClasses ConEQ(*LIS);
+      unsigned NumComps = ConEQ.Classify(LI);
+      if (NumComps <= 1)
+        return;
+      SmallVector<LiveInterval*, 8> NewComps(1, LI);
+      for (unsigned i = 1; i != NumComps; ++i) {
+        unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg));
+        NewComps.push_back(&LIS->createEmptyInterval(VReg));
+      }
+
+      ConEQ.Distribute(&NewComps[0], *MRI);
+    }
+
+    /// Wrapper method for \see LiveIntervals::shrinkToUses.
+    /// This method does the proper fixing of the live-ranges when the afore
+    /// mentioned method returns true.
+    void shrinkToUses(LiveInterval *LI,
+                      SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
+      if (LIS->shrinkToUses(LI, Dead))
+        // We may have created multiple connected components, split them.
+        splitNewRanges(LI);
+    }
+
   public:
-    static char ID; // Class identification, replacement for typeinfo
+    static char ID; ///< Class identification, replacement for typeinfo
     RegisterCoalescer() : MachineFunctionPass(ID) {
       initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     }
@@ -220,7 +266,7 @@ namespace {
     /// Implement the dump method.
     void print(raw_ostream &O, const Module* = nullptr) const override;
   };
-} /// end anonymous namespace
+} // end anonymous namespace
 
 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
 
@@ -254,11 +300,11 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
   return true;
 }
 
-// Return true if this block should be vacated by the coalescer to eliminate
-// branches. The important cases to handle in the coalescer are critical edges
-// split during phi elimination which contain only copies. Simple blocks that
-// contain non-branches should also be vacated, but this can be handled by an
-// earlier pass similar to early if-conversion.
+/// Return true if this block should be vacated by the coalescer to eliminate
+/// branches. The important cases to handle in the coalescer are critical edges
+/// split during phi elimination which contain only copies. Simple blocks that
+/// contain non-branches should also be vacated, but this can be handled by an
+/// earlier pass similar to early if-conversion.
 static bool isSplitEdge(const MachineBasicBlock *MBB) {
   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
     return false;
@@ -423,27 +469,11 @@ void RegisterCoalescer::eliminateDeadDefs() {
                 nullptr, this).eliminateDeadDefs(DeadDefs);
 }
 
-// Callback from eliminateDeadDefs().
 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
   // MI may be in WorkList. Make sure we don't visit it.
   ErasedInstrs.insert(MI);
 }
 
-/// We found a non-trivially-coalescable copy with IntA
-/// being the source and IntB being the dest, thus this defines a value number
-/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
-/// see if we can merge these two pieces of B into a single value number,
-/// eliminating a copy.  For example:
-///
-///  A3 = B0
-///    ...
-///  B1 = A3      <- this copy
-///
-/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
-/// value number to be replaced with B0 (which simplifies the B liveinterval).
-///
-/// This returns true if an interval was modified.
-///
 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
                                              MachineInstr *CopyMI) {
   assert(!CP.isPartial() && "This doesn't work for partial copies.");
@@ -455,6 +485,20 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
     LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
 
+  // We have a non-trivially-coalescable copy with IntA being the source and
+  // IntB being the dest, thus this defines a value number in IntB.  If the
+  // source value number (in IntA) is defined by a copy from B, see if we can
+  // merge these two pieces of B into a single value number, eliminating a copy.
+  // For example:
+  //
+  //  A3 = B0
+  //    ...
+  //  B1 = A3      <- this copy
+  //
+  // In this case, B0 can be extended to where the B1 copy lives, allowing the
+  // B1 value number to be replaced with B0 (which simplifies the B
+  // liveinterval).
+
   // BValNo is a value number in B that is defined by a copy from A.  'B1' in
   // the example above.
   LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
@@ -538,14 +582,12 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
   // will also add the isKill marker.
   CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
   if (AS->end == CopyIdx)
-    LIS->shrinkToUses(&IntA);
+    shrinkToUses(&IntA);
 
   ++numExtends;
   return true;
 }
 
-/// Return true if there are definitions of IntB
-/// other than BValNo val# that can reach uses of AValno val# of IntA.
 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
                                              LiveInterval &IntB,
                                              VNInfo *AValNo,
@@ -585,29 +627,6 @@ static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
   }
 }
 
-/// We found a non-trivially-coalescable copy with
-/// IntA being the source and IntB being the dest, thus this defines a value
-/// number in IntB.  If the source value number (in IntA) is defined by a
-/// commutable instruction and its other operand is coalesced to the copy dest
-/// register, see if we can transform the copy into a noop by commuting the
-/// definition. For example,
-///
-///  A3 = op A2 B0<kill>
-///    ...
-///  B1 = A3      <- this copy
-///    ...
-///     = op A3   <- more uses
-///
-/// ==>
-///
-///  B2 = op B0 A2<kill>
-///    ...
-///  B1 = B2      <- now an identity copy
-///    ...
-///     = op B2   <- more uses
-///
-/// This returns true if an interval was modified.
-///
 bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
                                                  MachineInstr *CopyMI) {
   assert(!CP.isPhys());
@@ -617,6 +636,26 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   LiveInterval &IntB =
       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
 
+  // We found a non-trivially-coalescable copy with IntA being the source and
+  // IntB being the dest, thus this defines a value number in IntB.  If the
+  // source value number (in IntA) is defined by a commutable instruction and
+  // its other operand is coalesced to the copy dest register, see if we can
+  // transform the copy into a noop by commuting the definition. For example,
+  //
+  //  A3 = op A2 B0<kill>
+  //    ...
+  //  B1 = A3      <- this copy
+  //    ...
+  //     = op A3   <- more uses
+  //
+  // ==>
+  //
+  //  B2 = op B0 A2<kill>
+  //    ...
+  //  B1 = B2      <- now an identity copy
+  //    ...
+  //     = op B2   <- more uses
+
   // BValNo is a value number in B that is defined by a copy from A. 'B1' in
   // the example above.
   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
@@ -745,15 +784,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
       continue;
     DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
     assert(DVNI->def == DefIdx);
-    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+    BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
     for (LiveInterval::SubRange &S : IntB.subranges()) {
       VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
       if (!SubDVNI)
         continue;
       VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
       assert(SubBValNo->def == CopyIdx);
-      VNInfo *Merged = S.MergeValueNumberInto(SubBValNo, SubDVNI);
-      Merged->def = CopyIdx;
+      S.MergeValueNumberInto(SubDVNI, SubBValNo);
     }
 
     ErasedInstrs.insert(UseMI);
@@ -809,7 +847,6 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
         VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
         addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
       }
-      SA.removeValNo(ASubValNo);
     }
   }
 
@@ -817,25 +854,30 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
   DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
 
-  IntA.removeValNo(AValNo);
-  // Remove valuenos in subranges (the A+B have subranges case has already been
-  // handled above)
-  if (!IntB.hasSubRanges()) {
-    SlotIndex AIdx = CopyIdx.getRegSlot(true);
-    for (LiveInterval::SubRange &SA : IntA.subranges()) {
-      VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
-      assert(ASubValNo != nullptr);
-      SA.removeValNo(ASubValNo);
-    }
-  }
+  LIS->removeVRegDefAt(IntA, AValNo->def);
+
   DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
   ++numCommutes;
   return true;
 }
 
-/// If the source of a copy is defined by a trivial
-/// computation, replace the copy by rematerialize the definition.
-bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
+/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
+/// defining a subregister.
+static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
+  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         "This code cannot handle physreg aliasing");
+  for (const MachineOperand &Op : MI.operands()) {
+    if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+      continue;
+    // Return true if we define the full register or don't care about the value
+    // inside other subregisters.
+    if (Op.getSubReg() == 0 || Op.isUndef())
+      return true;
+  }
+  return false;
+}
+
+bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
                                                 MachineInstr *CopyMI,
                                                 bool &IsDefCopy) {
   IsDefCopy = false;
@@ -863,8 +905,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
     return false;
   if (!TII->isTriviallyReMaterializable(DefMI, AA))
     return false;
+  if (!definesFullReg(*DefMI, SrcReg))
+    return false;
   bool SawStore = false;
-  if (!DefMI->isSafeToMove(TII, AA, SawStore))
+  if (!DefMI->isSafeToMove(AA, SawStore))
     return false;
   const MCInstrDesc &MCID = DefMI->getDesc();
   if (MCID.getNumDefs() != 1)
@@ -911,6 +955,28 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
   TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI);
   MachineInstr *NewMI = std::prev(MII);
 
+  // In a situation like the following:
+  //     %vreg0:subreg = instr              ; DefMI, subreg = DstIdx
+  //     %vreg1        = copy %vreg0:subreg ; CopyMI, SrcIdx = 0
+  // instead of widening %vreg1 to the register class of %vreg0 simply do:
+  //     %vreg1 = instr
+  const TargetRegisterClass *NewRC = CP.getNewRC();
+  if (DstIdx != 0) {
+    MachineOperand &DefMO = NewMI->getOperand(0);
+    if (DefMO.getSubReg() == DstIdx) {
+      assert(SrcIdx == 0 && CP.isFlipped()
+             && "Shouldn't have SrcIdx+DstIdx at this point");
+      const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+      const TargetRegisterClass *CommonRC =
+        TRI->getCommonSubClass(DefRC, DstRC);
+      if (CommonRC != nullptr) {
+        NewRC = CommonRC;
+        DstIdx = 0;
+        DefMO.setSubReg(0);
+      }
+    }
+  }
+
   LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
   CopyMI->eraseFromParent();
   ErasedInstrs.insert(CopyMI);
@@ -922,23 +988,23 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
   for (unsigned i = NewMI->getDesc().getNumOperands(),
          e = NewMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = NewMI->getOperand(i);
-    if (MO.isReg()) {
-      assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
+    if (MO.isReg() && MO.isDef()) {
+      assert(MO.isImplicit() && MO.isDead() &&
              TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
       NewMIImplDefs.push_back(MO.getReg());
     }
   }
 
   if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
-    const TargetRegisterClass *NewRC = CP.getNewRC();
     unsigned NewIdx = NewMI->getOperand(0).getSubReg();
 
-    if (NewIdx)
-      NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
-    else
-      NewRC = TRI->getCommonSubClass(NewRC, DefRC);
-
-    assert(NewRC && "subreg chosen for remat incompatible with instruction");
+    if (DefRC != nullptr) {
+      if (NewIdx)
+        NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
+      else
+        NewRC = TRI->getCommonSubClass(NewRC, DefRC);
+      assert(NewRC && "subreg chosen for remat incompatible with instruction");
+    }
     MRI->setRegClass(DstReg, NewRC);
 
     updateRegDefsUses(DstReg, DstReg, DstIdx);
@@ -1005,7 +1071,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
   ++NumReMats;
 
   // The source interval can become smaller because we removed a use.
-  LIS->shrinkToUses(&SrcInt, &DeadDefs);
+  shrinkToUses(&SrcInt, &DeadDefs);
   if (!DeadDefs.empty()) {
     // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
     // to describe DstReg instead.
@@ -1022,21 +1088,15 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
   return true;
 }
 
-static void removeUndefValue(LiveRange &LR, SlotIndex At)
-{
-  VNInfo *VNInfo = LR.getVNInfoAt(At);
-  assert(VNInfo != nullptr && SlotIndex::isSameInstr(VNInfo->def, At));
-  LR.removeValNo(VNInfo);
-}
-
-/// ProcessImpicitDefs may leave some copies of <undef>
-/// values, it only removes local variables. When we have a copy like:
-///
-///   %vreg1 = COPY %vreg2<undef>
-///
-/// We delete the copy and remove the corresponding value number from %vreg1.
-/// Any uses of that value number are marked as <undef>.
 bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
+  // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
+  // local variables. When we have a copy like:
+  //
+  //   %vreg1 = COPY %vreg2<undef>
+  //
+  // We delete the copy and remove the corresponding value number from %vreg1.
+  // Any uses of that value number are marked as <undef>.
+
   // Note that we do not query CoalescerPair here but redo isMoveInstr as the
   // CoalescerPair may have a new register class with adjusted subreg indices
   // at this point.
@@ -1061,22 +1121,25 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
 
   // Remove any DstReg segments starting at the instruction.
   LiveInterval &DstLI = LIS->getInterval(DstReg);
-  unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
   SlotIndex RegIndex = Idx.getRegSlot();
-  for (LiveInterval::SubRange &SR : DstLI.subranges()) {
-    if ((SR.LaneMask & DstMask) == 0)
-      continue;
-    removeUndefValue(SR, RegIndex);
-
-    DstLI.removeEmptySubRanges();
-  }
   // Remove value or merge with previous one in case of a subregister def.
   if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
-    VNInfo *VNInfo = DstLI.getVNInfoAt(RegIndex);
-    DstLI.MergeValueNumberInto(VNInfo, PrevVNI);
-  } else {
-    removeUndefValue(DstLI, RegIndex);
-  }
+    VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
+    DstLI.MergeValueNumberInto(VNI, PrevVNI);
+
+    // The affected subregister segments can be removed.
+    unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
+    for (LiveInterval::SubRange &SR : DstLI.subranges()) {
+      if ((SR.LaneMask & DstMask) == 0)
+        continue;
+
+      VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
+      assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
+      SR.removeValNo(SVNI);
+    }
+    DstLI.removeEmptySubRanges();
+  } else
+    LIS->removeVRegDefAt(DstLI, RegIndex);
 
   // Mark uses as undef.
   for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
@@ -1106,10 +1169,6 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   return true;
 }
 
-/// Replace all defs and uses of SrcReg to DstReg and update the subregister
-/// number if it is not zero. If DstReg is a physical register and the existing
-/// subregister number of the def / use being updated is not zero, make sure to
-/// set it to the correct physical subregister.
 void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
                                           unsigned DstReg,
                                           unsigned SubIdx) {
@@ -1151,7 +1210,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
 
       // A subreg use of a partially undef (super) register may be a complete
       // undef use now and then has to be marked that way.
-      if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) {
+      if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
         if (!DstInt->hasSubRanges()) {
           BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
           unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
@@ -1198,29 +1257,23 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
   }
 }
 
-/// Return true if a copy involving a physreg should be joined.
 bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
-  /// Always join simple intervals that are defined by a single copy from a
-  /// reserved register. This doesn't increase register pressure, so it is
-  /// always beneficial.
+  // Always join simple intervals that are defined by a single copy from a
+  // reserved register. This doesn't increase register pressure, so it is
+  // always beneficial.
   if (!MRI->isReserved(CP.getDstReg())) {
     DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
     return false;
   }
 
   LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
-  if (CP.isFlipped() && JoinVInt.containsOneValue())
+  if (JoinVInt.containsOneValue())
     return true;
 
-  DEBUG(dbgs() << "\tCannot join defs into reserved register.\n");
+  DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n");
   return false;
 }
 
-/// Attempt to join intervals corresponding to SrcReg/DstReg,
-/// which are the src/dst of the copy instruction CopyMI.  This returns true
-/// if the copy was successfully coalesced away. If it is not currently
-/// possible to coalesce this interval, but it may be possible if other
-/// things get coalesced, then it returns true by reference in 'Again'.
 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 
   Again = false;
@@ -1399,7 +1452,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   }
   if (ShrinkMainRange) {
     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
-    LIS->shrinkToUses(&LI);
+    shrinkToUses(&LI);
   }
 
   // SrcReg is guaranteed to be the register whose live interval that is
@@ -1407,7 +1460,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   LIS->removeInterval(CP.getSrcReg());
 
   // Update regalloc hint.
-  TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
+  TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
 
   DEBUG({
     dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
@@ -1424,24 +1477,23 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   return true;
 }
 
-/// Attempt joining with a reserved physreg.
 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+  unsigned DstReg = CP.getDstReg();
   assert(CP.isPhys() && "Must be a physreg copy");
-  assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
+  assert(MRI->isReserved(DstReg) && "Not a reserved register");
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
 
-  assert(CP.isFlipped() && RHS.containsOneValue() &&
-         "Invalid join with reserved register");
+  assert(RHS.containsOneValue() && "Invalid join with reserved register");
 
   // Optimization for reserved registers like ESP. We can only merge with a
-  // reserved physreg if RHS has a single value that is a copy of CP.DstReg().
+  // reserved physreg if RHS has a single value that is a copy of DstReg.
   // The live range of the reserved register will look like a set of dead defs
   // - we don't properly track the live range of reserved registers.
 
   // Deny any overlapping intervals.  This depends on all the reserved
   // register live ranges to look like dead defs.
-  for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI)
+  for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
     if (RHS.overlaps(LIS->getRegUnit(*UI))) {
       DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
       return false;
@@ -1453,7 +1505,46 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   // defs are there.
 
   // Delete the identity copy.
-  MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+  MachineInstr *CopyMI;
+  if (CP.isFlipped()) {
+    CopyMI = MRI->getVRegDef(RHS.reg);
+  } else {
+    if (!MRI->hasOneNonDBGUse(RHS.reg)) {
+      DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
+      return false;
+    }
+
+    MachineInstr *DestMI = MRI->getVRegDef(RHS.reg);
+    CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg);
+    const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+    const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
+
+    // We checked above that there are no interfering defs of the physical
+    // register. However, for this case, where we intent to move up the def of
+    // the physical register, we also need to check for interfering uses.
+    SlotIndexes *Indexes = LIS->getSlotIndexes();
+    for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
+         SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
+      MachineInstr *MI = LIS->getInstructionFromIndex(SI);
+      if (MI->readsRegister(DstReg, TRI)) {
+        DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
+        return false;
+      }
+    }
+
+    // We're going to remove the copy which defines a physical reserved
+    // register, so remove its valno, etc.
+    DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at "
+          << CopyRegIdx << "\n");
+
+    LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
+    // Create a new dead def at the new def location.
+    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+      LiveRange &LR = LIS->getRegUnit(*UI);
+      LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
+    }
+  }
+
   LIS->RemoveMachineInstrFromMaps(CopyMI);
   CopyMI->eraseFromParent();
 
@@ -1538,11 +1629,12 @@ class JoinVals {
   /// (Main) register we work on.
   const unsigned Reg;
 
-  // Reg (and therefore the values in this liverange) will end up as subregister
-  // SubIdx in the coalesced register. Either CP.DstIdx or CP.SrcIdx.
+  /// Reg (and therefore the values in this liverange) will end up as
+  /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
+  /// CP.SrcIdx.
   const unsigned SubIdx;
-  // The LaneMask that this liverange will occupy the coalesced register. May be
-  // smaller than the lanemask produced by SubIdx when merging subranges.
+  /// The LaneMask that this liverange will occupy the coalesced register. May
+  /// be smaller than the lanemask produced by SubIdx when merging subranges.
   const unsigned LaneMask;
 
   /// This is true when joining sub register ranges, false when joining main
@@ -1551,7 +1643,7 @@ class JoinVals {
   /// Whether the current LiveInterval tracks subregister liveness.
   const bool TrackSubRegLiveness;
 
-  // Values that will be present in the final live range.
+  /// Values that will be present in the final live range.
   SmallVectorImpl<VNInfo*> &NewVNInfo;
 
   const CoalescerPair &CP;
@@ -1559,75 +1651,75 @@ class JoinVals {
   SlotIndexes *Indexes;
   const TargetRegisterInfo *TRI;
 
-  // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
-  // This is suitable for passing to LiveInterval::join().
+  /// Value number assignments. Maps value numbers in LI to entries in
+  /// NewVNInfo. This is suitable for passing to LiveInterval::join().
   SmallVector<int, 8> Assignments;
 
-  // Conflict resolution for overlapping values.
+  /// Conflict resolution for overlapping values.
   enum ConflictResolution {
-    // No overlap, simply keep this value.
+    /// No overlap, simply keep this value.
     CR_Keep,
 
-    // Merge this value into OtherVNI and erase the defining instruction.
-    // Used for IMPLICIT_DEF, coalescable copies, and copies from external
-    // values.
+    /// Merge this value into OtherVNI and erase the defining instruction.
+    /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
+    /// values.
     CR_Erase,
 
-    // Merge this value into OtherVNI but keep the defining instruction.
-    // This is for the special case where OtherVNI is defined by the same
-    // instruction.
+    /// Merge this value into OtherVNI but keep the defining instruction.
+    /// This is for the special case where OtherVNI is defined by the same
+    /// instruction.
     CR_Merge,
 
-    // Keep this value, and have it replace OtherVNI where possible. This
-    // complicates value mapping since OtherVNI maps to two different values
-    // before and after this def.
-    // Used when clobbering undefined or dead lanes.
+    /// Keep this value, and have it replace OtherVNI where possible. This
+    /// complicates value mapping since OtherVNI maps to two different values
+    /// before and after this def.
+    /// Used when clobbering undefined or dead lanes.
     CR_Replace,
 
-    // Unresolved conflict. Visit later when all values have been mapped.
+    /// Unresolved conflict. Visit later when all values have been mapped.
     CR_Unresolved,
 
-    // Unresolvable conflict. Abort the join.
+    /// Unresolvable conflict. Abort the join.
     CR_Impossible
   };
 
-  // Per-value info for LI. The lane bit masks are all relative to the final
-  // joined register, so they can be compared directly between SrcReg and
-  // DstReg.
+  /// Per-value info for LI. The lane bit masks are all relative to the final
+  /// joined register, so they can be compared directly between SrcReg and
+  /// DstReg.
   struct Val {
     ConflictResolution Resolution;
 
-    // Lanes written by this def, 0 for unanalyzed values.
+    /// Lanes written by this def, 0 for unanalyzed values.
     unsigned WriteLanes;
 
-    // Lanes with defined values in this register. Other lanes are undef and
-    // safe to clobber.
+    /// Lanes with defined values in this register. Other lanes are undef and
+    /// safe to clobber.
     unsigned ValidLanes;
 
-    // Value in LI being redefined by this def.
+    /// Value in LI being redefined by this def.
     VNInfo *RedefVNI;
 
-    // Value in the other live range that overlaps this def, if any.
+    /// Value in the other live range that overlaps this def, if any.
     VNInfo *OtherVNI;
 
-    // Is this value an IMPLICIT_DEF that can be erased?
-    //
-    // IMPLICIT_DEF values should only exist at the end of a basic block that
-    // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
-    // safely erased if they are overlapping a live value in the other live
-    // interval.
-    //
-    // Weird control flow graphs and incomplete PHI handling in
-    // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
-    // longer live ranges. Such IMPLICIT_DEF values should be treated like
-    // normal values.
+    /// Is this value an IMPLICIT_DEF that can be erased?
+    ///
+    /// IMPLICIT_DEF values should only exist at the end of a basic block that
+    /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+    /// safely erased if they are overlapping a live value in the other live
+    /// interval.
+    ///
+    /// Weird control flow graphs and incomplete PHI handling in
+    /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+    /// longer live ranges. Such IMPLICIT_DEF values should be treated like
+    /// normal values.
     bool ErasableImplicitDef;
 
-    // True when the live range of this value will be pruned because of an
-    // overlapping CR_Replace value in the other live range.
+    /// True when the live range of this value will be pruned because of an
+    /// overlapping CR_Replace value in the other live range.
     bool Pruned;
 
-    // True once Pruned above has been computed.
+    /// True once Pruned above has been computed.
     bool PrunedComputed;
 
     Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
@@ -1637,17 +1729,61 @@ class JoinVals {
     bool isAnalyzed() const { return WriteLanes != 0; }
   };
 
-  // One entry per value number in LI.
+  /// One entry per value number in LI.
   SmallVector<Val, 8> Vals;
 
+  /// Compute the bitmask of lanes actually written by DefMI.
+  /// Set Redef if there are any partial register definitions that depend on the
+  /// previous value of the register.
   unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
+
+  /// Find the ultimate value that VNI was copied from.
   std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
+
   bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const;
+
+  /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+  /// Return a conflict resolution when possible, but leave the hard cases as
+  /// CR_Unresolved.
+  /// Recursively calls computeAssignment() on this and Other, guaranteeing that
+  /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+  /// The recursion always goes upwards in the dominator tree, making loops
+  /// impossible.
   ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+
+  /// Compute the value assignment for ValNo in RI.
+  /// This may be called recursively by analyzeValue(), but never for a ValNo on
+  /// the stack.
   void computeAssignment(unsigned ValNo, JoinVals &Other);
+
+  /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
+  /// the extent of the tainted lanes in the block.
+  ///
+  /// Multiple values in Other.LR can be affected since partial redefinitions
+  /// can preserve previously tainted lanes.
+  ///
+  ///   1 %dst = VLOAD           <-- Define all lanes in %dst
+  ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
+  ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
+  ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+  ///
+  /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+  /// entry to TaintedVals.
+  ///
+  /// Returns false if the tainted lanes extend beyond the basic block.
   bool taintExtent(unsigned, unsigned, JoinVals&,
                    SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+
+  /// Return true if MI uses any of the given Lanes from Reg.
+  /// This does not include partial redefinitions of Reg.
   bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const;
+
+  /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
+  /// be pruned:
+  ///
+  ///   %dst = COPY %src
+  ///   %src = COPY %dst  <-- This value to be pruned.
+  ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
   bool isPrunedValue(unsigned ValNo, JoinVals &Other);
 
 public:
@@ -1675,9 +1811,9 @@ public:
   void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
                    bool changeInstrs);
 
-  // Removes subranges starting at copies that get removed. This sometimes
-  // happens when undefined subranges are copied around. These ranges contain
-  // no usefull information and can be removed.
+  /// Removes subranges starting at copies that get removed. This sometimes
+  /// happens when undefined subranges are copied around. These ranges contain
+  /// no usefull information and can be removed.
   void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask);
 
   /// Erase any machine instructions that have been coalesced away.
@@ -1687,14 +1823,14 @@ public:
   void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
                    SmallVectorImpl<unsigned> &ShrinkRegs);
 
+  /// Remove liverange defs at places where implicit defs will be removed.
+  void removeImplicitDefs();
+
   /// Get the value assignments suitable for passing to LiveInterval::join.
   const int *getAssignments() const { return Assignments.data(); }
 };
 } // end anonymous namespace
 
-/// Compute the bitmask of lanes actually written by DefMI.
-/// Set Redef if there are any partial register definitions that depend on the
-/// previous value of the register.
 unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   const {
   unsigned L = 0;
@@ -1709,7 +1845,6 @@ unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   return L;
 }
 
-/// Find the ultimate value that VNI was copied from.
 std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
     const VNInfo *VNI) const {
   unsigned Reg = this->Reg;
@@ -1770,13 +1905,6 @@ bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
   return Orig0->def == Orig1->def && Reg0 == Reg1;
 }
 
-/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
-/// Return a conflict resolution when possible, but leave the hard cases as
-/// CR_Unresolved.
-/// Recursively calls computeAssignment() on this and Other, guaranteeing that
-/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
-/// The recursion always goes upwards in the dominator tree, making loops
-/// impossible.
 JoinVals::ConflictResolution
 JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   Val &V = Vals[ValNo];
@@ -1798,7 +1926,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     assert(DefMI != nullptr);
     if (SubRangeJoin) {
       // We don't care about the lanes when joining subregister ranges.
-      V.ValidLanes = V.WriteLanes = 1;
+      V.WriteLanes = V.ValidLanes = 1;
+      if (DefMI->isImplicitDef()) {
+        V.ValidLanes = 0;
+        V.ErasableImplicitDef = true;
+      }
     } else {
       bool Redef = false;
       V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
@@ -1995,9 +2127,6 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   return CR_Unresolved;
 }
 
-/// Compute the value assignment for ValNo in RI.
-/// This may be called recursively by analyzeValue(), but never for a ValNo on
-/// the stack.
 void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
   Val &V = Vals[ValNo];
   if (V.isAnalyzed()) {
@@ -2051,21 +2180,6 @@ bool JoinVals::mapValues(JoinVals &Other) {
   return true;
 }
 
-/// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
-/// the extent of the tainted lanes in the block.
-///
-/// Multiple values in Other.LR can be affected since partial redefinitions can
-/// preserve previously tainted lanes.
-///
-///   1 %dst = VLOAD           <-- Define all lanes in %dst
-///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
-///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
-///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
-///
-/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
-/// entry to TaintedVals.
-///
-/// Returns false if the tainted lanes extend beyond the basic block.
 bool JoinVals::
 taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
             SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
@@ -2106,8 +2220,6 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
   return true;
 }
 
-/// Return true if MI uses any of the given Lanes from Reg.
-/// This does not include partial redefinitions of Reg.
 bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx,
                          unsigned Lanes) const {
   if (MI->isDebugValue())
@@ -2189,13 +2301,6 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
   return true;
 }
 
-// Determine if ValNo is a copy of a value number in LR or Other.LR that will
-// be pruned:
-//
-//   %dst = COPY %src
-//   %src = COPY %dst  <-- This value to be pruned.
-//   %dst = COPY %src  <-- This value is a copy of a pruned value.
-//
 bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
   Val &V = Vals[ValNo];
   if (V.Pruned || V.PrunedComputed)
@@ -2308,25 +2413,39 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
     LI.removeEmptySubRanges();
 }
 
+void JoinVals::removeImplicitDefs() {
+  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+    Val &V = Vals[i];
+    if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
+      continue;
+
+    VNInfo *VNI = LR.getValNumInfo(i);
+    VNI->markUnused();
+    LR.removeValNo(VNI);
+  }
+}
+
 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
                            SmallVectorImpl<unsigned> &ShrinkRegs) {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     // Get the def location before markUnused() below invalidates it.
     SlotIndex Def = LR.getValNumInfo(i)->def;
     switch (Vals[i].Resolution) {
-    case CR_Keep:
+    case CR_Keep: {
       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
       // longer. The IMPLICIT_DEF instructions are only inserted by
       // PHIElimination to guarantee that all PHI predecessors have a value.
       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
         break;
-      // Remove value number i from LR. Note that this VNInfo is still present
-      // in NewVNInfo, so it will appear as an unused value number in the final
-      // joined interval.
-      LR.getValNumInfo(i)->markUnused();
-      LR.removeValNo(LR.getValNumInfo(i));
+      // Remove value number i from LR.
+      VNInfo *VNI = LR.getValNumInfo(i);
+      LR.removeValNo(VNI);
+      // Note that this VNInfo is reused and still referenced in NewVNInfo,
+      // make it appear like an unused value number.
+      VNI->markUnused();
       DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n');
       // FALL THROUGH.
+    }
 
     case CR_Erase: {
       MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
@@ -2349,7 +2468,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
   }
 }
 
-void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
                                          unsigned LaneMask,
                                          const CoalescerPair &CP) {
   SmallVector<VNInfo*, 16> NewVNInfo;
@@ -2358,13 +2477,20 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
   JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
                    NewVNInfo, CP, LIS, TRI, true, true);
 
-  /// Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
-  /// Conflicts should already be resolved so the mapping/resolution should
-  /// always succeed.
-  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
-    llvm_unreachable("Can't join subrange although main ranges are compatible");
-  if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
-    llvm_unreachable("Can't join subrange although main ranges are compatible");
+  // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
+  // We should be able to resolve all conflicts here as we could successfully do
+  // it on the mainrange already. There is however a problem when multiple
+  // ranges get mapped to the "overflow" lane mask bit which creates unexpected
+  // interferences.
+  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
+    DEBUG(dbgs() << "*** Couldn't join subrange!\n");
+    return false;
+  }
+  if (!LHSVals.resolveConflicts(RHSVals) ||
+      !RHSVals.resolveConflicts(LHSVals)) {
+    DEBUG(dbgs() << "*** Couldn't join subrange!\n");
+    return false;
+  }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
   // value mappings, so we need to remove any live ranges that overlap a
@@ -2374,6 +2500,9 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
   LHSVals.pruneValues(RHSVals, EndPoints, false);
   RHSVals.pruneValues(LHSVals, EndPoints, false);
 
+  LHSVals.removeImplicitDefs();
+  RHSVals.removeImplicitDefs();
+
   LRange.verify();
   RRange.verify();
 
@@ -2383,16 +2512,17 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
 
   DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
   if (EndPoints.empty())
-    return;
+    return true;
 
   // Recompute the parts of the live range we had to remove because of
   // CR_Replace conflicts.
   DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
                << " points: " << LRange << '\n');
   LIS->extendToIndices(LRange, EndPoints);
+  return true;
 }
 
-void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
                                           const LiveRange &ToMerge,
                                           unsigned LaneMask, CoalescerPair &CP) {
   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
@@ -2420,7 +2550,8 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
       CommonRange = &R;
     }
     LiveRange RangeCopy(ToMerge, Allocator);
-    joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
+    if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
+      return false;
     LaneMask &= ~RMask;
   }
 
@@ -2428,13 +2559,14 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
     DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask));
     LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
   }
+  return true;
 }
 
 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   SmallVector<VNInfo*, 16> NewVNInfo;
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
-  bool TrackSubRegLiveness = MRI->tracksSubRegLiveness();
+  bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
   JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
                    TRI, false, TrackSubRegLiveness);
   JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
@@ -2478,22 +2610,40 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
 
     // Determine lanemasks of RHS in the coalesced register and merge subranges.
     unsigned SrcIdx = CP.getSrcIdx();
+    bool Abort = false;
     if (!RHS.hasSubRanges()) {
       unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
                                   : TRI->getSubRegIndexLaneMask(SrcIdx);
-      mergeSubRangeInto(LHS, RHS, Mask, CP);
+      if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
+        Abort = true;
     } else {
       // Pair up subranges and merge.
       for (LiveInterval::SubRange &R : RHS.subranges()) {
         unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
-        mergeSubRangeInto(LHS, R, Mask, CP);
+        if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
+          Abort = true;
+          break;
+        }
       }
     }
+    if (Abort) {
+      // This shouldn't have happened :-(
+      // However we are aware of at least one existing problem where we
+      // can't merge subranges when multiple ranges end up in the
+      // "overflow bit" 32. As a workaround we drop all subregister ranges
+      // which means we loose some precision but are back to a well defined
+      // state.
+      assert((CP.getNewRC()->getLaneMask() & 0x80000000u)
+             && "SubRange merge should only fail when merging into bit 32.");
+      DEBUG(dbgs() << "\tSubrange join aborted!\n");
+      LHS.clearSubRanges();
+      RHS.clearSubRanges();
+    } else {
+      DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
 
-    DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
-
-    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
-    RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+      LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+      RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+    }
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2510,7 +2660,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
   while (!ShrinkRegs.empty())
-    LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+    shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
 
   // Join RHS into LHS.
   LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
@@ -2532,13 +2682,12 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   return true;
 }
 
-/// Attempt to join these two intervals.  On failure, this returns false.
 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
   return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
 }
 
 namespace {
-// Information concerning MBB coalescing priority.
+/// Information concerning MBB coalescing priority.
 struct MBBPriorityInfo {
   MachineBasicBlock *MBB;
   unsigned Depth;
@@ -2549,10 +2698,10 @@ struct MBBPriorityInfo {
 };
 }
 
-// C-style comparator that sorts first based on the loop depth of the basic
-// block (the unsigned), and then on the MBB number.
-//
-// EnableGlobalCopies assumes that the primary sort key is loop depth.
+/// C-style comparator that sorts first based on the loop depth of the basic
+/// block (the unsigned), and then on the MBB number.
+///
+/// EnableGlobalCopies assumes that the primary sort key is loop depth.
 static int compareMBBPriority(const MBBPriorityInfo *LHS,
                               const MBBPriorityInfo *RHS) {
   // Deeper loops first
@@ -2592,8 +2741,6 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
     || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
 }
 
-// Try joining WorkList copies starting from index From.
-// Null out any successful joins.
 bool RegisterCoalescer::
 copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   bool Progress = false;
@@ -2615,6 +2762,64 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   return Progress;
 }
 
+/// Check if DstReg is a terminal node.
+/// I.e., it does not have any affinity other than \p Copy.
+static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
+                          const MachineRegisterInfo *MRI) {
+  assert(Copy.isCopyLike());
+  // Check if the destination of this copy as any other affinity.
+  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
+    if (&MI != &Copy && MI.isCopyLike())
+      return false;
+  return true;
+}
+
+bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
+  assert(Copy.isCopyLike());
+  if (!UseTerminalRule)
+    return false;
+  unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
+  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
+  // Check if the destination of this copy has any other affinity.
+  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      // If SrcReg is a physical register, the copy won't be coalesced.
+      // Ignoring it may have other side effect (like missing
+      // rematerialization). So keep it.
+      TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+      !isTerminalReg(DstReg, Copy, MRI))
+    return false;
+
+  // DstReg is a terminal node. Check if it inteferes with any other
+  // copy involving SrcReg.
+  const MachineBasicBlock *OrigBB = Copy.getParent();
+  const LiveInterval &DstLI = LIS->getInterval(DstReg);
+  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
+    // Technically we should check if the weight of the new copy is
+    // interesting compared to the other one and update the weight
+    // of the copies accordingly. However, this would only work if
+    // we would gather all the copies first then coalesce, whereas
+    // right now we interleave both actions.
+    // For now, just consider the copies that are in the same block.
+    if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
+      continue;
+    unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
+    isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+                OtherSubReg);
+    if (OtherReg == SrcReg)
+      OtherReg = OtherSrcReg;
+    // Check if OtherReg is a non-terminal.
+    if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
+        isTerminalReg(OtherReg, MI, MRI))
+      continue;
+    // Check that OtherReg interfere with DstReg.
+    if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
+      DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n');
+      return true;
+    }
+  }
+  return false;
+}
+
 void
 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << MBB->getName() << ":\n");
@@ -2623,6 +2828,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   // yet, it might invalidate the iterator.
   const unsigned PrevSize = WorkList.size();
   if (JoinGlobalCopies) {
+    SmallVector<MachineInstr*, 2> LocalTerminals;
+    SmallVector<MachineInstr*, 2> GlobalTerminals;
     // Coalesce copies bottom-up to coalesce local defs before local uses. They
     // are not inherently easier to resolve, but slightly preferable until we
     // have local live range splitting. In particular this is required by
@@ -2631,17 +2838,35 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
          MII != E; ++MII) {
       if (!MII->isCopyLike())
         continue;
-      if (isLocalCopy(&(*MII), LIS))
-        LocalWorkList.push_back(&(*MII));
-      else
-        WorkList.push_back(&(*MII));
+      bool ApplyTerminalRule = applyTerminalRule(*MII);
+      if (isLocalCopy(&(*MII), LIS)) {
+        if (ApplyTerminalRule)
+          LocalTerminals.push_back(&(*MII));
+        else
+          LocalWorkList.push_back(&(*MII));
+      } else {
+        if (ApplyTerminalRule)
+          GlobalTerminals.push_back(&(*MII));
+        else
+          WorkList.push_back(&(*MII));
+      }
     }
+    // Append the copies evicted by the terminal rule at the end of the list.
+    LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
+    WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
   }
   else {
+    SmallVector<MachineInstr*, 2> Terminals;
      for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
           MII != E; ++MII)
-       if (MII->isCopyLike())
-         WorkList.push_back(MII);
+       if (MII->isCopyLike()) {
+        if (applyTerminalRule(*MII))
+          Terminals.push_back(&(*MII));
+        else
+          WorkList.push_back(MII);
+       }
+     // Append the copies evicted by the terminal rule at the end of the list.
+     WorkList.append(Terminals.begin(), Terminals.end());
   }
   // Try coalescing the collected copies immediately, and remove the nulls.
   // This prevents the WorkList from getting too large since most copies are
@@ -2704,15 +2929,14 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   MF = &fn;
   MRI = &fn.getRegInfo();
   TM = &fn.getTarget();
-  TRI = TM->getSubtargetImpl()->getRegisterInfo();
-  TII = TM->getSubtargetImpl()->getInstrInfo();
+  const TargetSubtargetInfo &STI = fn.getSubtarget();
+  TRI = STI.getRegisterInfo();
+  TII = STI.getInstrInfo();
   LIS = &getAnalysis<LiveIntervals>();
   AA = &getAnalysis<AliasAnalysis>();
   Loops = &getAnalysis<MachineLoopInfo>();
-
-  const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
   if (EnableGlobalCopies == cl::BOU_UNSET)
-    JoinGlobalCopies = ST.useMachineScheduler();
+    JoinGlobalCopies = STI.enableJoinGlobalCopies();
   else
     JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
 
@@ -2744,7 +2968,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
     unsigned Reg = InflateRegs[i];
     if (MRI->reg_nodbg_empty(Reg))
       continue;
-    if (MRI->recomputeRegClass(Reg, *TM)) {
+    if (MRI->recomputeRegClass(Reg)) {
       DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
                    << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
       LiveInterval &LI = LIS->getInterval(Reg);
@@ -2754,9 +2978,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
         // remove the subranges.
         LI.clearSubRanges();
       } else {
+#ifndef NDEBUG
         // If subranges are still supported, then the same subregs should still
         // be supported.
-#ifndef NDEBUG
         for (LiveInterval::SubRange &S : LI.subranges()) {
           assert ((S.LaneMask & ~MaxMask) == 0);
         }
@@ -2772,7 +2996,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   return true;
 }
 
-/// Implement the dump method.
 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
    LIS->print(O, m);
 }
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 9925efb..667783e 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -304,6 +304,7 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
   return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end();
 }
 
+namespace {
 /// Collect this instruction's unique uses and defs into SmallVectors for
 /// processing defs and uses in order.
 ///
@@ -354,6 +355,7 @@ protected:
     }
   }
 };
+} // namespace
 
 /// Collect physical and virtual register operands.
 static void collectOperands(const MachineInstr *MI,
@@ -748,9 +750,11 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
 ///
 /// This assumes that the current LiveOut set is sufficient.
 ///
-/// FIXME: This is expensive for an on-the-fly query. We need to cache the
-/// result per-SUnit with enough information to adjust for the current
-/// scheduling position. But this works as a proof of concept.
+/// This is expensive for an on-the-fly query because it calls
+/// bumpUpwardPressure to recompute the pressure sets based on current
+/// liveness. This mainly exists to verify correctness, e.g. with
+/// -verify-misched. getUpwardPressureDelta is the fast version of this query
+/// that uses the per-SUnit cache of the PressureDiff.
 void RegPressureTracker::
 getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
                           RegPressureDelta &Delta,
@@ -807,10 +811,8 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
 #endif
 }
 
-/// This is a prototype of the fast version of querying register pressure that
-/// does not directly depend on current liveness. It's still slow because we
-/// recompute pressure change on-the-fly. This implementation only exists to
-/// prove correctness.
+/// This is the fast version of querying register pressure that does not
+/// directly depend on current liveness.
 ///
 /// @param Delta captures information needed for heuristics.
 ///
@@ -948,6 +950,11 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
 /// register units of that pressure set introduced by this instruction.
 ///
 /// This assumes that the current LiveIn set is sufficient.
+///
+/// This is expensive for an on-the-fly query because it calls
+/// bumpDownwardPressure to recompute the pressure sets based on current
+/// liveness. We don't yet have a fast version of downward pressure tracking
+/// analagous to getUpwardPressureDelta.
 void RegPressureTracker::
 getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
                             ArrayRef<PressureChange> CriticalPSets,
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 6f8b337..76a7fef 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -36,8 +36,8 @@ static cl::opt<bool> StressSchedOpt(
 void SchedulingPriorityQueue::anchor() { }
 
 ScheduleDAG::ScheduleDAG(MachineFunction &mf)
-    : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()),
-      TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf),
+    : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()),
+      TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
       MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
 #ifndef NDEBUG
   StressSched = StressSchedOpt;
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 409e704..c60c518 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -27,7 +28,6 @@
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
@@ -51,18 +51,17 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
 
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo *mli,
-                                     bool IsPostRAFlag,
-                                     bool RemoveKillFlags,
+                                     bool IsPostRAFlag, bool RemoveKillFlags,
                                      LiveIntervals *lis)
-  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
-    IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
-    CanHandleTerminators(false), FirstDbgValue(nullptr) {
+    : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
+      IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
+      CanHandleTerminators(false), FirstDbgValue(nullptr) {
   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
   DbgValues.clear();
   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
          "Virtual registers must be removed prior to PostRA scheduling");
 
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &ST = mf.getSubtarget();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
 }
 
@@ -97,14 +96,15 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
 /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
 static void getUnderlyingObjects(const Value *V,
-                                 SmallVectorImpl<Value *> &Objects) {
+                                 SmallVectorImpl<Value *> &Objects,
+                                 const DataLayout &DL) {
   SmallPtrSet<const Value *, 16> Visited;
   SmallVector<const Value *, 4> Working(1, V);
   do {
     V = Working.pop_back_val();
 
     SmallVector<Value *, 4> Objs;
-    GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+    GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
 
     for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
          I != IE; ++I) {
@@ -133,7 +133,8 @@ UnderlyingObjectsVector;
 /// object, return the Value for that object.
 static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
                                          const MachineFrameInfo *MFI,
-                                         UnderlyingObjectsVector &Objects) {
+                                         UnderlyingObjectsVector &Objects,
+                                         const DataLayout &DL) {
   if (!MI->hasOneMemOperand() ||
       (!(*MI->memoperands_begin())->getValue() &&
        !(*MI->memoperands_begin())->getPseudoValue()) ||
@@ -142,6 +143,13 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
 
   if (const PseudoSourceValue *PSV =
       (*MI->memoperands_begin())->getPseudoValue()) {
+    // Function that contain tail calls don't have unique PseudoSourceValue
+    // objects. Two PseudoSourceValues might refer to the same or overlapping
+    // locations. The client code calling this function assumes this is not the
+    // case. So return a conservative answer of no known object.
+    if (MFI->hasTailCall())
+      return;
+
     // For now, ignore PseudoSourceValues which may alias LLVM IR values
     // because the code that uses this function has no way to cope with
     // such aliases.
@@ -157,12 +165,9 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
     return;
 
   SmallVector<Value *, 4> Objs;
-  getUnderlyingObjects(V, Objs);
-
-  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
-         I != IE; ++I) {
-    V = *I;
+  getUnderlyingObjects(V, Objs, DL);
 
+  for (Value *V : Objs) {
     if (!isIdentifiedObject(V)) {
       Objects.clear();
       return;
@@ -253,7 +258,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
   assert(MO.isDef() && "expect physreg def");
 
   // Ask the target if address-backscheduling is desirable, and if so how much.
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
 
   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
        Alias.isValid(); ++Alias) {
@@ -444,7 +449,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
       int DefOp = Def->findRegisterDefOperandIdx(Reg);
       dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
 
-      const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+      const TargetSubtargetInfo &ST = MF.getSubtarget();
       ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
       SU->addPred(dep);
     }
@@ -469,7 +474,8 @@ static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
 // This MI might have either incomplete info, or known to be unsafe
 // to deal with (i.e. volatile object).
 static inline bool isUnsafeMemoryObject(MachineInstr *MI,
-                                        const MachineFrameInfo *MFI) {
+                                        const MachineFrameInfo *MFI,
+                                        const DataLayout &DL) {
   if (!MI || MI->memoperands_empty())
     return true;
   // We purposefully do no check for hasOneMemOperand() here
@@ -492,11 +498,10 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
     return true;
 
   SmallVector<Value *, 4> Objs;
-  getUnderlyingObjects(V, Objs);
-  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(),
-         IE = Objs.end(); I != IE; ++I) {
+  getUnderlyingObjects(V, Objs, DL);
+  for (Value *V : Objs) {
     // Does this pointer refer to a distinct and identifiable object?
-    if (!isIdentifiedObject(*I))
+    if (!isIdentifiedObject(V))
       return true;
   }
 
@@ -509,7 +514,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
 /// these two MIs be reordered during scheduling from memory dependency
 /// point of view.
 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
-                             MachineInstr *MIa,
+                             const DataLayout &DL, MachineInstr *MIa,
                              MachineInstr *MIb) {
   const MachineFunction *MF = MIa->getParent()->getParent();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -528,7 +533,7 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
     return true;
 
-  if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
+  if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
     return true;
 
   // If we are dealing with two "normal" loads, we do not need an edge
@@ -580,10 +585,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 
 /// This recursive function iterates over chain deps of SUb looking for
 /// "latest" node that needs a chain edge to SUa.
-static unsigned
-iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
-                 SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
-                 SmallPtrSetImpl<const SUnit*> &Visited) {
+static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+                                 const DataLayout &DL, SUnit *SUa, SUnit *SUb,
+                                 SUnit *ExitSU, unsigned *Depth,
+                                 SmallPtrSetImpl<const SUnit *> &Visited) {
   if (!SUa || !SUb || SUb == ExitSU)
     return *Depth;
 
@@ -608,7 +613,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   // add that edge to the predecessors chain of SUb,
   // and stop descending.
   if (*Depth > 200 ||
-      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+      MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
     SUb->addPred(SDep(SUa, SDep::MayAliasMem));
     return *Depth;
   }
@@ -618,7 +623,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
        I != E; ++I)
     if (I->isNormalMemoryOrBarrier())
-      iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+      iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
   return *Depth;
 }
 
@@ -627,7 +632,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 /// checks whether SU can be aliasing any node dominated
 /// by it.
 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
-                            SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+                            const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
+                            std::set<SUnit *> &CheckList,
                             unsigned LatencyToLoad) {
   if (!SU)
     return;
@@ -639,7 +645,7 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
        I != IE; ++I) {
     if (SU == *I)
       continue;
-    if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+    if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
       SDep Dep(SU, SDep::MayAliasMem);
       Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
       (*I)->addPred(Dep);
@@ -650,22 +656,22 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
     for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
          JE = (*I)->Succs.end(); J != JE; ++J)
       if (J->isNormalMemoryOrBarrier())
-        iterateChainSucc (AA, MFI, SU, J->getSUnit(),
-                          ExitSU, &Depth, Visited);
+        iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
+                         Visited);
   }
 }
 
 /// Check whether two objects need a chain edge, if so, add it
 /// otherwise remember the rejected SU.
-static inline
-void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
-                         SUnit *SUa, SUnit *SUb,
-                         std::set<SUnit *> &RejectList,
-                         unsigned TrueMemOrderLatency = 0,
-                         bool isNormalMemory = false) {
+static inline void addChainDependency(AliasAnalysis *AA,
+                                      const MachineFrameInfo *MFI,
+                                      const DataLayout &DL, SUnit *SUa,
+                                      SUnit *SUb, std::set<SUnit *> &RejectList,
+                                      unsigned TrueMemOrderLatency = 0,
+                                      bool isNormalMemory = false) {
   // If this is a false dependency,
   // do not add the edge, but rememeber the rejected node.
-  if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+  if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
     SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
     Dep.setLatency(TrueMemOrderLatency);
     SUb->addPred(Dep);
@@ -743,7 +749,7 @@ void ScheduleDAGInstrs::initSUnits() {
 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
                                         RegPressureTracker *RPTracker,
                                         PressureDiffs *PDiffs) {
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
   bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
                                                        : ST.useAA();
   AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
@@ -884,7 +890,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       BarrierChain = SU;
       // This is a barrier event that acts as a pivotal node in the DAG,
       // so it is safe to clear list of exposed nodes.
-      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
       RejectMemNodes.clear();
       NonAliasMemDefs.clear();
@@ -897,25 +903,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         unsigned ChainLatency = 0;
         if (AliasChain->getInstr()->mayLoad())
           ChainLatency = TrueMemOrderLatency;
-        addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
-                           ChainLatency);
+        addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+                           RejectMemNodes, ChainLatency);
       }
       AliasChain = SU;
       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-        addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
+        addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                           PendingLoads[k], RejectMemNodes,
                            TrueMemOrderLatency);
       for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
            AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes);
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                             I->second[i], RejectMemNodes);
       }
       for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
-                             TrueMemOrderLatency);
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                             I->second[i], RejectMemNodes, TrueMemOrderLatency);
       }
-      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
       PendingLoads.clear();
       AliasMemDefs.clear();
@@ -929,7 +937,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         BarrierChain->addPred(SDep(SU, SDep::Barrier));
 
       UnderlyingObjectsVector Objs;
-      getUnderlyingObjectsForInstr(MI, MFI, Objs);
+      getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
 
       if (Objs.empty()) {
         // Treat all other stores conservatively.
@@ -953,8 +961,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
         if (I != IE) {
           for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-            addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
-                               0, true);
+            addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                               I->second[i], RejectMemNodes, 0, true);
 
           // If we're not using AA, then we only need one store per object.
           if (!AAForDep)
@@ -978,7 +986,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
         if (J != JE) {
           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
-            addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
+            addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                               J->second[i], RejectMemNodes,
                                TrueMemOrderLatency, true);
           J->second.clear();
         }
@@ -987,23 +996,23 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         // Add dependencies from all the PendingLoads, i.e. loads
         // with no underlying object.
         for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-          addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                             PendingLoads[k], RejectMemNodes,
                              TrueMemOrderLatency);
         // Add dependence on alias chain, if needed.
         if (AliasChain)
-          addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
-        // But we also should check dependent instructions for the
-        // SU in question.
-        adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
-                        TrueMemOrderLatency);
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+                             RejectMemNodes);
       }
+      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
     } else if (MI->mayLoad()) {
       bool MayAlias = true;
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else {
         UnderlyingObjectsVector Objs;
-        getUnderlyingObjectsForInstr(MI, MFI, Objs);
+        getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
 
         if (Objs.empty()) {
           // A load with no underlying object. Depend on all
@@ -1011,8 +1020,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
             for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-              addChainDependency(AAForDep, MFI, SU, I->second[i],
-                                 RejectMemNodes);
+              addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                                 I->second[i], RejectMemNodes);
 
           PendingLoads.push_back(SU);
           MayAlias = true;
@@ -1035,18 +1044,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
             ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
           if (I != IE)
             for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-              addChainDependency(AAForDep, MFI, SU, I->second[i],
-                                 RejectMemNodes, 0, true);
+              addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+                                 I->second[i], RejectMemNodes, 0, true);
           if (ThisMayAlias)
             AliasMemUses[V].push_back(SU);
           else
             NonAliasMemUses[V].push_back(SU);
         }
         if (MayAlias)
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
+          adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
+                          RejectMemNodes, /*Latency=*/0);
         // Add dependencies on alias and barrier chains, if needed.
         if (MayAlias && AliasChain)
-          addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
+          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+                             RejectMemNodes);
         if (BarrierChain)
           BarrierChain->addPred(SDep(SU, SDep::Barrier));
       }
@@ -1080,22 +1091,65 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
   }
 }
 
+/// \brief If we change a kill flag on the bundle instruction implicit register
+/// operands, then we also need to propagate that to any instructions inside
+/// the bundle which had the same kill state.
+static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
+                                 bool NewKillState) {
+  if (MI->getOpcode() != TargetOpcode::BUNDLE)
+    return;
+
+  // Walk backwards from the last instruction in the bundle to the first.
+  // Once we set a kill flag on an instruction, we bail out, as otherwise we
+  // might set it on too many operands.  We will clear as many flags as we
+  // can though.
+  MachineBasicBlock::instr_iterator Begin = MI;
+  MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+  while (Begin != End) {
+    for (MIOperands MO(--End); MO.isValid(); ++MO) {
+      if (!MO->isReg() || MO->isDef() || Reg != MO->getReg())
+        continue;
+
+      // DEBUG_VALUE nodes do not contribute to code generation and should
+      // always be ignored.  Failure to do so may result in trying to modify
+      // KILL flags on DEBUG_VALUE nodes, which is distressing.
+      if (MO->isDebug())
+        continue;
+
+      // If the register has the internal flag then it could be killing an
+      // internal def of the register.  In this case, just skip.  We only want
+      // to toggle the flag on operands visible outside the bundle.
+      if (MO->isInternalRead())
+        continue;
+
+      if (MO->isKill() == NewKillState)
+        continue;
+      MO->setIsKill(NewKillState);
+      if (NewKillState)
+        return;
+    }
+  }
+}
+
 bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
   // Setting kill flag...
   if (!MO.isKill()) {
     MO.setIsKill(true);
+    toggleBundleKillFlag(MI, MO.getReg(), true);
     return false;
   }
 
   // If MO itself is live, clear the kill flag...
   if (LiveRegs.test(MO.getReg())) {
     MO.setIsKill(false);
+    toggleBundleKillFlag(MI, MO.getReg(), false);
     return false;
   }
 
   // If any subreg of MO is live, then create an imp-def for that
   // subreg and keep MO marked as killed.
   MO.setIsKill(false);
+  toggleBundleKillFlag(MI, MO.getReg(), false);
   bool AllDead = true;
   const unsigned SuperReg = MO.getReg();
   MachineInstrBuilder MIB(MF, MI);
@@ -1106,8 +1160,10 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
     }
   }
 
-  if(AllDead)
+  if(AllDead) {
     MO.setIsKill(true);
+    toggleBundleKillFlag(MI, MO.getReg(), true);
+  }
   return false;
 }
 
@@ -1180,6 +1236,12 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
         // Warning: toggleKillFlag may invalidate MO.
         toggleKillFlag(MI, MO);
         DEBUG(MI->dump());
+        DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
+          MachineBasicBlock::instr_iterator Begin = MI;
+          MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+          while (++Begin != End)
+            DEBUG(Begin->dump());
+        });
       }
 
       killedRegs.set(Reg);
@@ -1214,7 +1276,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
   else if (SU == &ExitSU)
     oss << "<exit>";
   else
-    SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
+    SU->getInstr()->print(oss, /*SkipOpers=*/true);
   return oss.str();
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1df61e4..2c2dc85 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -246,10 +246,11 @@ namespace {
     SDValue visitSDIVREM(SDNode *N);
     SDValue visitUDIVREM(SDNode *N);
     SDValue visitAND(SDNode *N);
+    SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
     SDValue visitOR(SDNode *N);
+    SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
     SDValue visitXOR(SDNode *N);
     SDValue SimplifyVBinOp(SDNode *N);
-    SDValue SimplifyVUnaryOp(SDNode *N);
     SDValue visitSHL(SDNode *N);
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
@@ -267,6 +268,7 @@ namespace {
     SDValue visitZERO_EXTEND(SDNode *N);
     SDValue visitANY_EXTEND(SDNode *N);
     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+    SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
     SDValue visitTRUNCATE(SDNode *N);
     SDValue visitBITCAST(SDNode *N);
     SDValue visitBUILD_PAIR(SDNode *N);
@@ -302,9 +304,16 @@ namespace {
     SDValue visitCONCAT_VECTORS(SDNode *N);
     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
     SDValue visitVECTOR_SHUFFLE(SDNode *N);
+    SDValue visitSCALAR_TO_VECTOR(SDNode *N);
     SDValue visitINSERT_SUBVECTOR(SDNode *N);
     SDValue visitMLOAD(SDNode *N);
     SDValue visitMSTORE(SDNode *N);
+    SDValue visitMGATHER(SDNode *N);
+    SDValue visitMSCATTER(SDNode *N);
+    SDValue visitFP_TO_FP16(SDNode *N);
+
+    SDValue visitFADDForFMACombine(SDNode *N);
+    SDValue visitFSUBForFMACombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -327,6 +336,7 @@ namespace {
     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                          unsigned HiOp);
     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+    SDValue CombineExtLoad(SDNode *N);
     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildSDIVPow2(SDNode *N);
@@ -363,6 +373,28 @@ namespace {
     /// chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
+    /// Holds a pointer to an LSBaseSDNode as well as information on where it
+    /// is located in a sequence of memory operations connected by a chain.
+    struct MemOpLink {
+      MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+      MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+      // Ptr to the mem node.
+      LSBaseSDNode *MemNode;
+      // Offset from the base ptr.
+      int64_t OffsetFromBase;
+      // What is the sequence number of this mem node.
+      // Lowest mem operand in the DAG starts at zero.
+      unsigned SequenceNum;
+    };
+
+    /// This is a helper function for MergeConsecutiveStores. When the source
+    /// elements of the consecutive stores are all constants or all extracted
+    /// vector elements, try to merge them into one larger store.
+    /// \return True if a merged store was created.
+    bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+                                         EVT MemVT, unsigned NumElem,
+                                         bool IsConstantSrc, bool UseVector);
+
     /// Merge consecutive store operations into a wide store.
     /// This optimization uses wide integers or vectors when possible.
     /// \return True if some memory operations were changed.
@@ -380,12 +412,9 @@ namespace {
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
-      AttributeSet FnAttrs =
-          DAG.getMachineFunction().getFunction()->getAttributes();
-      ForCodeSize =
-          FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::OptimizeForSize) ||
-          FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+      auto *F = DAG.getMachineFunction().getFunction();
+      ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
+                    F->hasFnAttribute(Attribute::MinSize);
     }
 
     /// Runs the dag combiner on all nodes in the work list
@@ -446,7 +475,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
 }
 
 SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 }
 
@@ -566,7 +595,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   case ISD::ConstantFP: {
     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
     V.changeSign();
-    return DAG.getConstantFP(V, Op.getValueType());
+    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
   }
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
@@ -683,13 +712,23 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
           EltVT.getSizeInBits() >= SplatBitSize);
 }
 
-// \brief Returns the SDNode if it is a constant BuildVector or constant.
-static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
+// \brief Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
+static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
   if (isa<ConstantSDNode>(N))
     return N.getNode();
-  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
-  if (BV && BV->isConstant())
-    return BV;
+  if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+    return N.getNode();
+  return nullptr;
+}
+
+// \brief Returns the SDNode if it is a constant float BuildVector
+// or constant float.
+static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
+  if (isa<ConstantFPSDNode>(N))
+    return N.getNode();
+  if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+    return N.getNode();
   return nullptr;
 }
 
@@ -735,10 +774,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
                                     SDValue N0, SDValue N1) {
   EVT VT = N0.getValueType();
   if (N0.getOpcode() == Opc) {
-    if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
-      if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
+    if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+      if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
+        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
         return SDValue();
       }
@@ -755,10 +794,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
   }
 
   if (N1.getOpcode() == Opc) {
-    if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
-      if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
+    if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
+      if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
+        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
         return SDValue();
       }
@@ -1309,6 +1348,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
   case ISD::TRUNCATE:           return visitTRUNCATE(N);
   case ISD::BITCAST:            return visitBITCAST(N);
   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
@@ -1344,9 +1384,13 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
+  case ISD::MGATHER:            return visitMGATHER(N);
   case ISD::MLOAD:              return visitMLOAD(N);
+  case ISD::MSCATTER:           return visitMSCATTER(N);
   case ISD::MSTORE:             return visitMSTORE(N);
+  case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
   }
   return SDValue();
 }
@@ -1412,9 +1456,10 @@ SDValue DAGCombiner::combine(SDNode *N) {
       SDNode *CSENode;
       if (const BinaryWithFlagsSDNode *BinNode =
               dyn_cast<BinaryWithFlagsSDNode>(N)) {
-        CSENode = DAG.getNodeIfExists(
-            N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
-            BinNode->hasNoSignedWrap(), BinNode->isExact());
+        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+                                      BinNode->Flags.hasNoUnsignedWrap(),
+                                      BinNode->Flags.hasNoSignedWrap(),
+                                      BinNode->Flags.hasExact());
       } else {
         CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
       }
@@ -1471,7 +1516,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       switch (Op.getOpcode()) {
       case ISD::EntryToken:
         // Entry tokens don't need to be added to the list. They are
-        // rededundant.
+        // redundant.
         Changed = true;
         break;
 
@@ -1500,7 +1545,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
 
   SDValue Result;
 
-  // If we've change things around then replace token factor.
+  // If we've changed things around then replace token factor.
   if (Changed) {
     if (Ops.empty()) {
       // The entry token is the only possible outcome.
@@ -1510,8 +1555,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
     }
 
-    // Don't add users to work list.
-    return CombineTo(N, Result, false);
+    // Add users to worklist if AA is enabled, since it may introduce
+    // a lot of new chained token factors while removing memory deps.
+    bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+      : DAG.getSubtarget().useAA();
+    return CombineTo(N, Result, UseAA /*add to worklist*/);
   }
 
   return Result;
@@ -1534,17 +1582,37 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
 }
 
+static bool isNullConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isNullValue();
+}
+
+static bool isAllOnesConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isAllOnesValue();
+}
+
+static bool isOneConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isOne();
+}
+
+/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
+/// ContantSDNode pointer else nullptr.
+static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
+  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
+}
+
 SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (add x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1559,13 +1627,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
   // fold (add c1, c2) -> c1+c2
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
   // fold (add x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N0;
   // fold (add Sym, c) -> Sym+c
   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
@@ -1576,22 +1647,21 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                     (uint64_t)N1C->getSExtValue());
   // fold ((c1-A)+c2) -> (c1+c2)-A
   if (N1C && N0.getOpcode() == ISD::SUB)
-    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
-      return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+    if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::SUB, DL, VT,
                          DAG.getConstant(N1C->getAPIntValue()+
-                                         N0C->getAPIntValue(), VT),
+                                         N0C->getAPIntValue(), DL, VT),
                          N0.getOperand(1));
+    }
   // reassociate add
-  SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
-  if (RADD.getNode())
+  if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
     return RADD;
   // fold ((0-A) + B) -> B-A
-  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
-      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+  if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
   // fold (A + (0-B)) -> A-B
-  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
-      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+  if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
   // fold (A+(B-A)) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
@@ -1651,34 +1721,27 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   }
 
   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
-  if (N1.getOpcode() == ISD::SHL &&
-      N1.getOperand(0).getOpcode() == ISD::SUB)
-    if (ConstantSDNode *C =
-          dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
-      if (C->getAPIntValue() == 0)
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
-                           DAG.getNode(ISD::SHL, SDLoc(N), VT,
-                                       N1.getOperand(0).getOperand(1),
-                                       N1.getOperand(1)));
-  if (N0.getOpcode() == ISD::SHL &&
-      N0.getOperand(0).getOpcode() == ISD::SUB)
-    if (ConstantSDNode *C =
-          dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
-      if (C->getAPIntValue() == 0)
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
-                           DAG.getNode(ISD::SHL, SDLoc(N), VT,
-                                       N0.getOperand(0).getOperand(1),
-                                       N0.getOperand(1)));
+  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
+      isNullConstant(N1.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
+                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+                                   N1.getOperand(0).getOperand(1),
+                                   N1.getOperand(1)));
+  if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
+      isNullConstant(N0.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
+                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+                                   N0.getOperand(0).getOperand(1),
+                                   N0.getOperand(1)));
 
   if (N1.getOpcode() == ISD::AND) {
     SDValue AndOp0 = N1.getOperand(0);
-    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
     unsigned DestBits = VT.getScalarType().getSizeInBits();
 
     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
     // and similar xforms where the inner op is either ~0 or 0.
-    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+    if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
       SDLoc DL(N);
       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
     }
@@ -1699,7 +1762,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     if (TN->getVT() == MVT::i1) {
       SDLoc DL(N);
       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
-                                 DAG.getConstant(1, VT));
+                                 DAG.getConstant(1, DL, VT));
       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
     }
   }
@@ -1710,8 +1773,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
 SDValue DAGCombiner::visitADDC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an ADD.
@@ -1721,11 +1782,13 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
                                  SDLoc(N), MVT::Glue));
 
   // canonicalize constant to RHS.
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (N0C && !N1C)
     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
 
   // fold (addc x, 0) -> x + no carry out
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
                                         SDLoc(N), MVT::Glue));
 
@@ -1752,10 +1815,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue CarryIn = N->getOperand(2);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
 
   // canonicalize constant to RHS
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (N0C && !N1C)
     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
                        N1, N0, CarryIn);
@@ -1773,25 +1836,21 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
                              SelectionDAG &DAG,
                              bool LegalOperations, bool LegalTypes) {
   if (!VT.isVector())
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
   return SDValue();
 }
 
 SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
-    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (sub x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1803,14 +1862,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (N0 == N1)
     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   // fold (sub c1, c2) -> c1-c2
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
   // fold (sub x, c) -> (add x, -c)
-  if (N1C)
-    return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
-                       DAG.getConstant(-N1C->getAPIntValue(), VT));
+  if (N1C) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::ADD, DL, VT, N0,
+                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+  }
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   // fold A-(A-B) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
@@ -1822,10 +1885,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
     return N0.getOperand(0);
   // fold C2-(A+C1) -> (C2-C1)-A
+  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
+    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+    SDLoc DL(N);
     SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
-                                   VT);
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
+                                   DL, VT);
+    return DAG.getNode(ISD::SUB, DL, VT, NewC,
                        N1.getOperand(0));
   }
   // fold ((A+(B+or-C))-B) -> A+or-C
@@ -1866,7 +1932,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
         if (GA->getGlobal() == GB->getGlobal())
           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
-                                 VT);
+                                 SDLoc(N), VT);
     }
 
   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
@@ -1875,7 +1941,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     if (TN->getVT() == MVT::i1) {
       SDLoc DL(N);
       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
-                                 DAG.getConstant(1, VT));
+                                 DAG.getConstant(1, DL, VT));
       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
     }
   }
@@ -1886,8 +1952,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
 SDValue DAGCombiner::visitSUBC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an SUB.
@@ -1897,18 +1961,20 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
                                  MVT::Glue));
 
   // fold (subc x, x) -> 0 + no borrow
-  if (N0 == N1)
-    return CombineTo(N, DAG.getConstant(0, VT),
-                     DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
+  if (N0 == N1) {
+    SDLoc DL(N);
+    return CombineTo(N, DAG.getConstant(0, DL, VT),
+                     DAG.getNode(ISD::CARRY_FALSE, DL,
                                  MVT::Glue));
+  }
 
   // fold (subc x, 0) -> x + no borrow
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
                                         MVT::Glue));
 
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
     return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
                      DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
                                  MVT::Glue));
@@ -1935,33 +2001,41 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
 
   // fold (mul x, undef) -> 0
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
 
   bool N0IsConst = false;
   bool N1IsConst = false;
+  bool N1IsOpaqueConst = false;
+  bool N0IsOpaqueConst = false;
   APInt ConstValue0, ConstValue1;
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
     N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
   } else {
-    N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
-    ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
-                            : APInt();
-    N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
-    ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
-                            : APInt();
+    N0IsConst = isa<ConstantSDNode>(N0);
+    if (N0IsConst) {
+      ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+      N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
+    }
+    N1IsConst = isa<ConstantSDNode>(N1);
+    if (N1IsConst) {
+      ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+      N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
+    }
   }
 
   // fold (mul c1, c2) -> c1*c2
-  if (N0IsConst && N1IsConst)
-    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
+  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
+    return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
+                                      N0.getNode(), N1.getNode());
 
-  // canonicalize constant to RHS
-  if (N0IsConst && !N1IsConst)
+  // canonicalize constant to RHS (vector doesn't have to splat)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   // fold (mul x, 0) -> 0
   if (N1IsConst && ConstValue1 == 0)
@@ -1974,23 +2048,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
     return N0;
   // fold (mul x, -1) -> 0-x
-  if (N1IsConst && ConstValue1.isAllOnesValue())
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), N0);
+  if (N1IsConst && ConstValue1.isAllOnesValue()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT), N0);
+  }
   // fold (mul x, (1 << c)) -> x << c
-  if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
-    return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                       DAG.getConstant(ConstValue1.logBase2(),
+  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
+      IsFullSplat) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SHL, DL, VT, N0,
+                       DAG.getConstant(ConstValue1.logBase2(), DL,
                                        getShiftAmountTy(N0.getValueType())));
+  }
   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
-  if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
+  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
+      IsFullSplat) {
     unsigned Log2Val = (-ConstValue1).logBase2();
+    SDLoc DL(N);
     // FIXME: If the input is something that is easily negated (e.g. a
     // single-use add), we should put the negate there.
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT),
-                       DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                            DAG.getConstant(Log2Val,
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT),
+                       DAG.getNode(ISD::SHL, DL, VT, N0,
+                            DAG.getConstant(Log2Val, DL,
                                       getShiftAmountTy(N0.getValueType()))));
   }
 
@@ -2041,8 +2122,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                    N0.getOperand(1), N1));
 
   // reassociate mul
-  SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
-  if (RMUL.getNode())
+  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
     return RMUL;
 
   return SDValue();
@@ -2051,26 +2131,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
 SDValue DAGCombiner::visitSDIV(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   EVT VT = N->getValueType(0);
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (sdiv c1, c2) -> c1/c2
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
   // fold (sdiv X, 1) -> X
-  if (N1C && N1C->getAPIntValue() == 1LL)
+  if (N1C && N1C->isOne())
     return N0;
   // fold (sdiv X, -1) -> 0-X
-  if (N1C && N1C->isAllOnesValue())
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), N0);
+  if (N1C && N1C->isAllOnesValue()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT), N0);
+  }
   // If we know the sign bits of both operands are zero, strength reduce to a
   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   if (!VT.isVector()) {
@@ -2080,8 +2161,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   }
 
   // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
-                                     (-N1C->getAPIntValue()).isPowerOf2())) {
+  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      (N1C->getAPIntValue().isPowerOf2() ||
+       (-N1C->getAPIntValue()).isPowerOf2())) {
     // If dividing by powers of two is cheap, then don't perform the following
     // fold.
     if (TLI.isPow2SDivCheap())
@@ -2093,24 +2175,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
       return Res;
 
     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+    SDLoc DL(N);
 
     // Splat the sign bit into the register
     SDValue SGN =
-        DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
-                    DAG.getConstant(VT.getScalarSizeInBits() - 1,
+        DAG.getNode(ISD::SRA, DL, VT, N0,
+                    DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
                                     getShiftAmountTy(N0.getValueType())));
     AddToWorklist(SGN.getNode());
 
     // Add (N0 < 0) ? abs2 - 1 : 0;
     SDValue SRL =
-        DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
-                    DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+        DAG.getNode(ISD::SRL, DL, VT, SGN,
+                    DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
                                     getShiftAmountTy(SGN.getValueType())));
-    SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
+    SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
     AddToWorklist(SRL.getNode());
     AddToWorklist(ADD.getNode());    // Divide by pow2
-    SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
-                  DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+    SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
+                  DAG.getConstant(lg2, DL,
+                                  getShiftAmountTy(ADD.getValueType())));
 
     // If we're dividing by a positive value, we're done.  Otherwise, we must
     // negate the result.
@@ -2118,10 +2202,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
       return SRA;
 
     AddToWorklist(SRA.getNode());
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
+    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
   }
 
-  // if integer divide is expensive and we satisfy the requirements, emit an
+  // If integer divide is expensive and we satisfy the requirements, emit an
   // alternate sequence.
   if (N1C && !TLI.isIntDivCheap()) {
     SDValue Op = BuildSDIV(N);
@@ -2130,7 +2214,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 
   // undef / X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // X / undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2141,36 +2225,40 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 SDValue DAGCombiner::visitUDIV(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   EVT VT = N->getValueType(0);
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (udiv c1, c2) -> c1/c2
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
   // fold (udiv x, (1 << c)) -> x >>u c
-  if (N1C && N1C->getAPIntValue().isPowerOf2())
-    return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SRL, DL, VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
                                        getShiftAmountTy(N0.getValueType())));
+  }
   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
       if (SHC->getAPIntValue().isPowerOf2()) {
         EVT ADDVT = N1.getOperand(1).getValueType();
-        SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
+        SDLoc DL(N);
+        SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
                                   N1.getOperand(1),
                                   DAG.getConstant(SHC->getAPIntValue()
                                                                   .logBase2(),
-                                                  ADDVT));
+                                                  DL, ADDVT));
         AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
+        return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
       }
     }
   }
@@ -2182,7 +2270,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
 
   // undef / X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // X / undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2193,13 +2281,15 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
 SDValue DAGCombiner::visitSREM(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   EVT VT = N->getValueType(0);
 
   // fold (srem c1, c2) -> c1%c2
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
   // If we know the sign bits of both operands are zero, strength reduce to a
   // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
   if (!VT.isVector()) {
@@ -2224,7 +2314,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
 
   // undef % X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // X % undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2235,27 +2325,33 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
 SDValue DAGCombiner::visitUREM(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   EVT VT = N->getValueType(0);
 
   // fold (urem c1, c2) -> c1%c2
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
   // fold (urem x, pow2) -> (and x, pow2-1)
-  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue()-1,VT));
+  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      N1C->getAPIntValue().isPowerOf2()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+  }
   // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
   if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
       if (SHC->getAPIntValue().isPowerOf2()) {
+        SDLoc DL(N);
         SDValue Add =
-          DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
-                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+          DAG.getNode(ISD::ADD, DL, VT, N1,
+                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
                                  VT));
         AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
+        return DAG.getNode(ISD::AND, DL, VT, N0, Add);
       }
     }
   }
@@ -2277,7 +2373,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
 
   // undef % X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // X % undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2288,21 +2384,23 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
 SDValue DAGCombiner::visitMULHS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
   // fold (mulhs x, 0) -> 0
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N1;
   // fold (mulhs x, 1) -> (sra x, size(x)-1)
-  if (N1C && N1C->getAPIntValue() == 1)
-    return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
+  if (isOneConstant(N1)) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+                                       DL,
                                        getShiftAmountTy(N0.getValueType())));
+  }
   // fold (mulhs x, undef) -> 0
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
 
   // If the type twice as wide is legal, transform the mulhs to a wider multiply
   // plus a shift.
@@ -2315,7 +2413,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(N1.getValueType())));
       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
     }
   }
@@ -2326,19 +2425,18 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
 SDValue DAGCombiner::visitMULHU(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
   // fold (mulhu x, 0) -> 0
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N1;
   // fold (mulhu x, 1) -> 0
-  if (N1C && N1C->getAPIntValue() == 1)
-    return DAG.getConstant(0, N0.getValueType());
+  if (isOneConstant(N1))
+    return DAG.getConstant(0, DL, N0.getValueType());
   // fold (mulhu x, undef) -> 0
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
 
   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   // plus a shift.
@@ -2351,7 +2449,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(N1.getValueType())));
       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
     }
   }
@@ -2417,8 +2516,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // If the type twice as wide is legal, transform the mulhu to a wider multiply
-  // plus a shift.
+  // If the type is twice as wide is legal, transform the mulhu to a wider
+  // multiply plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
     MVT Simple = VT.getSimpleVT();
     unsigned SimpleSize = Simple.getSizeInBits();
@@ -2429,7 +2528,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
       // Compute the high part as N1.
       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(Lo.getValueType())));
       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
       // Compute the low part as N0.
       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -2447,8 +2547,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // If the type twice as wide is legal, transform the mulhu to a wider multiply
-  // plus a shift.
+  // If the type is twice as wide is legal, transform the mulhu to a wider
+  // multiply plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
     MVT Simple = VT.getSimpleVT();
     unsigned SimpleSize = Simple.getSizeInBits();
@@ -2459,7 +2559,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
       // Compute the high part as N1.
       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(Lo.getValueType())));
       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
       // Compute the low part as N0.
       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -2615,7 +2716,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
       // build vector of all zeros that might be illegal at this stage.
       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
         if (!LegalTypes)
-          ShOp = DAG.getConstant(0, VT);
+          ShOp = DAG.getConstant(0, SDLoc(N), VT);
         else
           ShOp = SDValue();
       }
@@ -2636,7 +2737,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
       ShOp = N0->getOperand(0);
       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
         if (!LegalTypes)
-          ShOp = DAG.getConstant(0, VT);
+          ShOp = DAG.getConstant(0, SDLoc(N), VT);
         else
           ShOp = SDValue();
       }
@@ -2657,19 +2758,122 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   return SDValue();
 }
 
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an And operation to a single value. This makes them reusable in the context
+/// of visitSELECT(). Rules involving constants are not included as
+/// visitSELECT() already handles those cases.
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
+                                  SDNode *LocReference) {
+  EVT VT = N1.getValueType();
+
+  // fold (and x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, SDLoc(LocReference), VT);
+  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        LL.getValueType().isInteger()) {
+      // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+      if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
+        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                     LR.getValueType(), LL, RL);
+        AddToWorklist(ORNode.getNode());
+        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+      }
+      if (isAllOnesConstant(LR)) {
+        // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+        if (Op1 == ISD::SETEQ) {
+          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+                                        LR.getValueType(), LL, RL);
+          AddToWorklist(ANDNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+        }
+        // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+        if (Op1 == ISD::SETGT) {
+          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                       LR.getValueType(), LL, RL);
+          AddToWorklist(ORNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        }
+      }
+    }
+    // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+    if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+        Op0 == Op1 && LL.getValueType().isInteger() &&
+      Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+                            (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+      SDLoc DL(N0);
+      SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+                                    LL, DAG.getConstant(1, DL,
+                                                        LL.getValueType()));
+      AddToWorklist(ADDNode.getNode());
+      return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+                          DAG.getConstant(2, DL, LL.getValueType()),
+                          ISD::SETUGE);
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations ||
+           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+            TLI.isOperationLegal(ISD::SETCC,
+                            getSetCCResultType(N0.getSimpleValueType())))))
+        return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+      VT.getSizeInBits() <= 64) {
+    if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      APInt ADDC = ADDI->getAPIntValue();
+      if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+        // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+        // immediate for an add, but it is legal if its top c2 bits are set,
+        // transform the ADD so the immediate doesn't need to be materialized
+        // in a register.
+        if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+          APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                             SRLI->getZExtValue());
+          if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+            ADDC |= Mask;
+            if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+              SDLoc DL(N0);
+              SDValue NewAdd =
+                DAG.getNode(ISD::ADD, DL, VT,
+                            N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
+              CombineTo(N0.getNode(), NewAdd);
+              // Return N so it doesn't get rechecked!
+              return SDValue(LocReference, 0);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  SDValue LL, LR, RL, RR, CC0, CC1;
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N1.getValueType();
-  unsigned BitWidth = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (and x, 0) -> 0, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -2677,13 +2881,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return DAG.getConstant(
           APInt::getNullValue(
               N0.getValueType().getScalarType().getSizeInBits()),
-          N0.getValueType());
+          SDLoc(N), N0.getValueType());
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
       // do not return N1, because undef node may exist in N1
       return DAG.getConstant(
           APInt::getNullValue(
               N1.getValueType().getScalarType().getSizeInBits()),
-          N1.getValueType());
+          SDLoc(N), N1.getValueType());
 
     // fold (and x, -1) -> x, vector edition
     if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -2692,25 +2896,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return N0;
   }
 
-  // fold (and x, undef) -> 0
-  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
   // fold (and c1, c2) -> c1&c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   // fold (and x, -1) -> x
-  if (N1C && N1C->isAllOnesValue())
+  if (isAllOnesConstant(N1))
     return N0;
   // if (and x, c) is known to be zero, return 0
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
                                    APInt::getAllOnesValue(BitWidth)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // reassociate and
-  SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
-  if (RAND.getNode())
+  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
     return RAND;
   // fold (and (or x, C), D) -> D if (C & D) == D
   if (N1C && N0.getOpcode() == ISD::OR)
@@ -2840,117 +3044,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return SDValue(N, 0); // Return N so it doesn't get rechecked!
     }
   }
-  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
-  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
-    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
-    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
-    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
-        LL.getValueType().isInteger()) {
-      // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
-      if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
-      }
-      // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
-        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
-                                      LR.getValueType(), LL, RL);
-        AddToWorklist(ANDNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
-      }
-      // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
-      }
-    }
-    // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
-    if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
-        Op0 == Op1 && LL.getValueType().isInteger() &&
-      Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
-                                 cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
-                                (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
-                                 cast<ConstantSDNode>(RR)->isNullValue()))) {
-      SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
-                                    LL, DAG.getConstant(1, LL.getValueType()));
-      AddToWorklist(ADDNode.getNode());
-      return DAG.getSetCC(SDLoc(N), VT, ADDNode,
-                          DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
-    }
-    // canonicalize equivalent to ll == rl
-    if (LL == RR && LR == RL) {
-      Op1 = ISD::getSetCCSwappedOperands(Op1);
-      std::swap(RL, RR);
-    }
-    if (LL == RL && LR == RR) {
-      bool isInteger = LL.getValueType().isInteger();
-      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
-      if (Result != ISD::SETCC_INVALID &&
-          (!LegalOperations ||
-           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
-            TLI.isOperationLegal(ISD::SETCC,
-                            getSetCCResultType(N0.getSimpleValueType())))))
-        return DAG.getSetCC(SDLoc(N), N0.getValueType(),
-                            LL, LR, Result);
-    }
-  }
-
-  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
-
-  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
-  // fold (and (sra)) -> (and (srl)) when possible.
-  if (!VT.isVector() &&
-      SimplifyDemandedBits(SDValue(N, 0)))
-    return SDValue(N, 0);
-
-  // fold (zext_inreg (extload x)) -> (zextload x)
-  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    EVT MemVT = LN0->getMemoryVT();
-    // If we zero all the possible extended bits, then we can turn this into
-    // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
-    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
-        ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
-                                       LN0->getChain(), LN0->getBasePtr(),
-                                       MemVT, LN0->getMemOperand());
-      AddToWorklist(N);
-      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-    }
-  }
-  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
-  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
-      N0.hasOneUse()) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    EVT MemVT = LN0->getMemoryVT();
-    // If we zero all the possible extended bits, then we can turn this into
-    // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
-    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
-        ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
-                                       LN0->getChain(), LN0->getBasePtr(),
-                                       MemVT, LN0->getMemOperand());
-      AddToWorklist(N);
-      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-    }
-  }
 
   // fold (and (load x), 255) -> (zextload x, i8)
   // fold (and (extload x, i16), 255) -> (zextload x, i8)
@@ -3001,8 +3094,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
             unsigned EVTStoreBytes = ExtVT.getStoreSize();
             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-            NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
-                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            SDLoc DL(LN0);
+            NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
             Alignment = MinAlign(Alignment, PtrOff);
           }
 
@@ -3022,33 +3116,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     }
   }
 
-  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
-      VT.getSizeInBits() <= 64) {
-    if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-      APInt ADDC = ADDI->getAPIntValue();
-      if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
-        // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
-        // immediate for an add, but it is legal if its top c2 bits are set,
-        // transform the ADD so the immediate doesn't need to be materialized
-        // in a register.
-        if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
-          APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
-                                             SRLI->getZExtValue());
-          if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
-            ADDC |= Mask;
-            if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
-              SDValue NewAdd =
-                DAG.getNode(ISD::ADD, SDLoc(N0), VT,
-                            N0.getOperand(0), DAG.getConstant(ADDC, VT));
-              CombineTo(N0.getNode(), NewAdd);
-              return SDValue(N, 0); // Return N so it doesn't get rechecked!
-            }
-          }
-        }
-      }
-    }
+  if (SDValue Combined = visitANDLike(N0, N1, N))
+    return Combined;
+
+  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
   }
 
+  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+  // fold (and (sra)) -> (and (srl)) when possible.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (zext_inreg (extload x)) -> (zextload x)
+  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       MemVT, LN0->getMemOperand());
+      AddToWorklist(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       MemVT, LN0->getMemOperand());
+      AddToWorklist(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
     SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -3159,9 +3280,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   }
 
   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
-  if (OpSizeInBits > 16)
-    Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
-                      DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+  if (OpSizeInBits > 16) {
+    SDLoc DL(N);
+    Res = DAG.getNode(ISD::SRL, DL, VT, Res,
+                      DAG.getConstant(OpSizeInBits - 16, DL,
+                                      getShiftAmountTy(VT)));
+  }
   return Res;
 }
 
@@ -3299,33 +3423,125 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
     return SDValue();
 
-  SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
-                              SDValue(Parts[0],0));
+  SDLoc DL(N);
+  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
+                              SDValue(Parts[0], 0));
 
   // Result of the bswap should be rotated by 16. If it's not legal, then
   // do  (x << 16) | (x >> 16).
-  SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
-    return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
+    return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
-    return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
-  return DAG.getNode(ISD::OR, SDLoc(N), VT,
-                     DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
-                     DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
+    return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+  return DAG.getNode(ISD::OR, DL, VT,
+                     DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
+                     DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an Or operation to a single value \see visitANDLike().
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+  EVT VT = N1.getValueType();
+  // fold (or x, undef) -> -1
+  if (!LegalOperations &&
+      (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+                           SDLoc(LocReference), VT);
+  }
+  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
+      // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+      // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+      if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+                                     LR.getValueType(), LL, RL);
+        AddToWorklist(ORNode.getNode());
+        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+      }
+      // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+      // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
+      if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+                                      LR.getValueType(), LL, RL);
+        AddToWorklist(ANDNode.getNode());
+        return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations ||
+           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+            TLI.isOperationLegal(ISD::SETCC,
+              getSetCCResultType(N0.getValueType())))))
+        return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
+  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
+      // Don't increase # computations.
+      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+    // We can only do this xform if we know that bits from X that are set in C2
+    // but not in C1 are already zero.  Likewise for Y.
+    if (const ConstantSDNode *N0O1C =
+        getAsNonOpaqueConstant(N0.getOperand(1))) {
+      if (const ConstantSDNode *N1O1C =
+          getAsNonOpaqueConstant(N1.getOperand(1))) {
+        // We can only do this xform if we know that bits from X that are set in
+        // C2 but not in C1 are already zero.  Likewise for Y.
+        const APInt &LHSMask = N0O1C->getAPIntValue();
+        const APInt &RHSMask = N1O1C->getAPIntValue();
+
+        if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+            DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+          SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+                                  N0.getOperand(0), N1.getOperand(0));
+          SDLoc DL(LocReference);
+          return DAG.getNode(ISD::AND, DL, VT, X,
+                             DAG.getConstant(LHSMask | RHSMask, DL, VT));
+        }
+      }
+    }
+  }
+
+  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+  if (N0.getOpcode() == ISD::AND &&
+      N1.getOpcode() == ISD::AND &&
+      N0.getOperand(0) == N1.getOperand(0) &&
+      // Don't increase # computations.
+      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+    SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+                            N0.getOperand(1), N1.getOperand(1));
+    return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  SDValue LL, LR, RL, RR, CC0, CC1;
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N1.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (or x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3339,13 +3555,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
       return DAG.getConstant(
           APInt::getAllOnesValue(
               N0.getValueType().getScalarType().getSizeInBits()),
-          N0.getValueType());
+          SDLoc(N), N0.getValueType());
     if (ISD::isBuildVectorAllOnes(N1.getNode()))
       // do not return N1, because undef node may exist in N1
       return DAG.getConstant(
           APInt::getAllOnesValue(
               N1.getValueType().getScalarType().getSizeInBits()),
-          N1.getValueType());
+          SDLoc(N), N1.getValueType());
 
     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
@@ -3404,28 +3620,28 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     }
   }
 
-  // fold (or x, undef) -> -1
-  if (!LegalOperations &&
-      (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
-    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
-    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
-  }
   // fold (or c1, c2) -> c1|c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   // fold (or x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N0;
   // fold (or x, -1) -> -1
-  if (N1C && N1C->isAllOnesValue())
+  if (isAllOnesConstant(N1))
     return N1;
   // fold (or x, c) -> c iff (x & ~c) == 0
   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
     return N1;
 
+  if (SDValue Combined = visitORLike(N0, N1, N))
+    return Combined;
+
   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
   SDValue BSwap = MatchBSwapHWord(N, N0, N1);
   if (BSwap.getNode())
@@ -3435,8 +3651,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     return BSwap;
 
   // reassociate or
-  SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
-  if (ROR.getNode())
+  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
     return ROR;
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   // iff (c1 & c2) == 0.
@@ -3444,86 +3659,20 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
              isa<ConstantSDNode>(N0.getOperand(1))) {
     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
-      if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1))
+      if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+                                                   N1C, C1))
         return DAG.getNode(
             ISD::AND, SDLoc(N), VT,
             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
       return SDValue();
     }
   }
-  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
-  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
-    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
-    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
-    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
-        LL.getValueType().isInteger()) {
-      // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
-      // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
-      if (cast<ConstantSDNode>(LR)->isNullValue() &&
-          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
-      }
-      // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
-      // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
-          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
-        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
-                                      LR.getValueType(), LL, RL);
-        AddToWorklist(ANDNode.getNode());
-        return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
-      }
-    }
-    // canonicalize equivalent to ll == rl
-    if (LL == RR && LR == RL) {
-      Op1 = ISD::getSetCCSwappedOperands(Op1);
-      std::swap(RL, RR);
-    }
-    if (LL == RL && LR == RR) {
-      bool isInteger = LL.getValueType().isInteger();
-      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
-      if (Result != ISD::SETCC_INVALID &&
-          (!LegalOperations ||
-           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
-            TLI.isOperationLegal(ISD::SETCC,
-              getSetCCResultType(N0.getValueType())))))
-        return DAG.getSetCC(SDLoc(N), N0.getValueType(),
-                            LL, LR, Result);
-    }
-  }
-
   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   if (N0.getOpcode() == N1.getOpcode()) {
     SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
     if (Tmp.getNode()) return Tmp;
   }
 
-  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
-  if (N0.getOpcode() == ISD::AND &&
-      N1.getOpcode() == ISD::AND &&
-      N0.getOperand(1).getOpcode() == ISD::Constant &&
-      N1.getOperand(1).getOpcode() == ISD::Constant &&
-      // Don't increase # computations.
-      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
-    // We can only do this xform if we know that bits from X that are set in C2
-    // but not in C1 are already zero.  Likewise for Y.
-    const APInt &LHSMask =
-      cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    const APInt &RHSMask =
-      cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
-
-    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
-        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
-      SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
-                              N0.getOperand(0), N1.getOperand(0));
-      return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
-                         DAG.getConstant(LHSMask | RHSMask, VT));
-    }
-  }
-
   // See if this is some rotate idiom.
   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
     return SDValue(Rot, 0);
@@ -3751,7 +3900,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
         Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
       }
 
-      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
     }
 
     return Rot.getNode();
@@ -3793,15 +3942,12 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
 SDValue DAGCombiner::visitXOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  SDValue LHS, RHS, CC;
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (xor x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3812,27 +3958,30 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
 
   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
   if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // fold (xor x, undef) -> undef
   if (N0.getOpcode() == ISD::UNDEF)
     return N0;
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
   // fold (xor c1, c2) -> c1^c2
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   // fold (xor x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
     return N0;
   // reassociate xor
-  SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
-  if (RXOR.getNode())
+  if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
     return RXOR;
 
   // fold !(x cc y) -> (x !cc y)
+  SDValue LHS, RHS, CC;
   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
     bool isInt = LHS.getValueType().isInteger();
     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
@@ -3853,18 +4002,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   }
 
   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
-  if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+  if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
       N0.getNode()->hasOneUse() &&
       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
     SDValue V = N0.getOperand(0);
-    V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
-                    DAG.getConstant(1, V.getValueType()));
+    SDLoc DL(N0);
+    V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
+                    DAG.getConstant(1, DL, V.getValueType()));
     AddToWorklist(V.getNode());
     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
   }
 
   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
-  if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+  if (isOneConstant(N1) && VT == MVT::i1 &&
       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
@@ -3876,7 +4026,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     }
   }
   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
-  if (N1C && N1C->isAllOnesValue() &&
+  if (isAllOnesConstant(N1) &&
       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
@@ -3897,21 +4047,48 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   }
   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
   if (N1C && N0.getOpcode() == ISD::XOR) {
-    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
-    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (N00C)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
+    if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
                          DAG.getConstant(N1C->getAPIntValue() ^
-                                         N00C->getAPIntValue(), VT));
-    if (N01C)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
+                                         N00C->getAPIntValue(), DL, VT));
+    }
+    if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
                          DAG.getConstant(N1C->getAPIntValue() ^
-                                         N01C->getAPIntValue(), VT));
+                                         N01C->getAPIntValue(), DL, VT));
+    }
   }
   // fold (xor x, x) -> 0
   if (N0 == N1)
     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
 
+  // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
+  // Here is a concrete example of this equivalence:
+  // i16   x ==  14
+  // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
+  // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
+  //
+  // =>
+  //
+  // i16     ~1      == 0b1111111111111110
+  // i16 rol(~1, 14) == 0b1011111111111111
+  //
+  // Some additional tips to help conceptualize this transform:
+  // - Try to see the operation as placing a single zero in a value of all ones.
+  // - There exists no value for x which would allow the result to contain zero.
+  // - Values of x larger than the bitwidth are undefined and do not require a
+  //   consistent result.
+  // - Pushing the zero left requires shifting one bits in from the right.
+  // A rotate left of ~1 is a nice way of achieving the desired result.
+  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
+      && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
+                       N0.getOperand(1));
+  }
+
   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   if (N0.getOpcode() == N1.getOpcode()) {
     SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
@@ -3929,10 +4106,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
 /// Handle transforms common to the three shifts, when the shift amount is a
 /// constant.
 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
-  // We can't and shouldn't fold opaque constants.
-  if (Amt->isOpaque())
-    return SDValue();
-
   SDNode *LHS = N->getOperand(0).getNode();
   if (!LHS->hasOneUse()) return SDValue();
 
@@ -3959,8 +4132,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   }
 
   // We require the RHS of the binop to be a constant and not opaque as well.
-  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
-  if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
+  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+  if (!BinOpCst) return SDValue();
 
   // FIXME: disable this unless the input to the binop is a shift by a constant.
   // If it is not a shift, it pessimizes some common cases like:
@@ -4013,14 +4186,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
     SDValue N01 = N->getOperand(0).getOperand(1);
 
     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
-      EVT TruncVT = N->getValueType(0);
-      SDValue N00 = N->getOperand(0).getOperand(0);
-      APInt TruncC = N01C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+      if (!N01C->isOpaque()) {
+        EVT TruncVT = N->getValueType(0);
+        SDValue N00 = N->getOperand(0).getOperand(0);
+        APInt TruncC = N01C->getAPIntValue();
+        TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+        SDLoc DL(N);
 
-      return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
-                         DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
-                         DAG.getConstant(TruncC, TruncVT));
+        return DAG.getNode(ISD::AND, DL, TruncVT,
+                           DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
+                           DAG.getConstant(TruncC, DL, TruncVT));
+      }
     }
   }
 
@@ -4042,15 +4218,14 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
 SDValue DAGCombiner::visitSHL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
   // fold vector ops
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
     // If setcc produces all-one true value then:
@@ -4064,7 +4239,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
-          if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV))
+          if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
+                                                     N01CV, N1CV))
             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
         }
       } else {
@@ -4074,10 +4250,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   }
 
   // fold (shl c1, c2) -> c1<<c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
   // fold (shl 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
     return N0;
   // fold (shl x, c >= size(x)) -> undef
   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4087,11 +4264,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return N0;
   // fold (shl undef, x) -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // if (shl x, c) is known to be zero, return 0
   if (DAG.MaskedValueIsZero(SDValue(N, 0),
                             APInt::getAllOnesValue(OpSizeInBits)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
       N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -4108,10 +4285,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t c1 = N0C1->getZExtValue();
       uint64_t c2 = N1C->getZExtValue();
+      SDLoc DL(N);
       if (c1 + c2 >= OpSizeInBits)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, N1.getValueType()));
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
     }
   }
 
@@ -4131,12 +4309,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       EVT InnerShiftVT = N0Op0.getValueType();
       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
       if (c2 >= OpSizeInBits - InnerShiftSize) {
+        SDLoc DL(N0);
         if (c1 + c2 >= OpSizeInBits)
-          return DAG.getConstant(0, VT);
-        return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
-                           DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+          return DAG.getConstant(0, DL, VT);
+        return DAG.getNode(ISD::SHL, DL, VT,
+                           DAG.getNode(N0.getOpcode(), DL, VT,
                                        N0Op0->getOperand(0)),
-                           DAG.getConstant(c1 + c2, N1.getValueType()));
+                           DAG.getConstant(c1 + c2, DL, N1.getValueType()));
       }
     }
   }
@@ -4154,8 +4333,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
         if (c1 == c2) {
           SDValue NewOp0 = N0.getOperand(0);
           EVT CountVT = NewOp0.getOperand(1).getValueType();
-          SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
-                                       NewOp0, DAG.getConstant(c2, CountVT));
+          SDLoc DL(N);
+          SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
+                                       NewOp0,
+                                       DAG.getConstant(c2, DL, CountVT));
           AddToWorklist(NewSHL.getNode());
           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
         }
@@ -4176,25 +4357,30 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
         SDValue Shift;
         if (c2 > c1) {
           Mask = Mask.shl(c2 - c1);
-          Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                              DAG.getConstant(c2 - c1, N1.getValueType()));
+          SDLoc DL(N);
+          Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                              DAG.getConstant(c2 - c1, DL, N1.getValueType()));
         } else {
           Mask = Mask.lshr(c1 - c2);
-          Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                              DAG.getConstant(c1 - c2, N1.getValueType()));
+          SDLoc DL(N);
+          Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+                              DAG.getConstant(c1 - c2, DL, N1.getValueType()));
         }
-        return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
-                           DAG.getConstant(Mask, VT));
+        SDLoc DL(N0);
+        return DAG.getNode(ISD::AND, DL, VT, Shift,
+                           DAG.getConstant(Mask, DL, VT));
       }
     }
   }
   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
     unsigned BitSize = VT.getScalarSizeInBits();
+    SDLoc DL(N);
     SDValue HiBitsMask =
       DAG.getConstant(APInt::getHighBitsSet(BitSize,
-                                            BitSize - N1C->getZExtValue()), VT);
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+                                            BitSize - N1C->getZExtValue()),
+                      DL, VT);
+    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
                        HiBitsMask);
   }
 
@@ -4210,7 +4396,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
   }
 
-  if (N1C) {
+  if (N1C && !N1C->isOpaque()) {
     SDValue NewSHL = visitShiftByConstant(N, N1C);
     if (NewSHL.getNode())
       return NewSHL;
@@ -4222,27 +4408,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
 SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N1C = isConstOrConstSplat(N1);
   }
 
   // fold (sra c1, c2) -> (sra c1, c2)
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
   // fold (sra 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
     return N0;
   // fold (sra -1, x) -> -1
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
     return N0;
   // fold (sra x, (setge c, size(x))) -> undef
   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4270,8 +4456,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
       if (Sum >= OpSizeInBits)
         Sum = OpSizeInBits - 1;
-      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(Sum, N1.getValueType()));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(Sum, DL, N1.getValueType()));
     }
   }
 
@@ -4303,14 +4490,15 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
           TLI.isTruncateFree(VT, TruncVT)) {
 
-          SDValue Amt = DAG.getConstant(ShiftAmt,
-              getShiftAmountTy(N0.getOperand(0).getValueType()));
-          SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
-                                      N0.getOperand(0), Amt);
-          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
-                                      Shift);
-          return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
-                             N->getValueType(0), Trunc);
+        SDLoc DL(N);
+        SDValue Amt = DAG.getConstant(ShiftAmt, DL,
+            getShiftAmountTy(N0.getOperand(0).getValueType()));
+        SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+                                    N0.getOperand(0), Amt);
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
+                                    Shift);
+        return DAG.getNode(ISD::SIGN_EXTEND, DL,
+                           N->getValueType(0), Trunc);
       }
     }
   }
@@ -4337,12 +4525,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       EVT LargeVT = N0Op0.getValueType();
 
       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+        SDLoc DL(N);
         SDValue Amt =
-          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
-        SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+        SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
                                   N0Op0.getOperand(0), Amt);
-        return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+        return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
       }
     }
   }
@@ -4356,7 +4545,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
 
-  if (N1C) {
+  if (N1C && !N1C->isOpaque()) {
     SDValue NewSRA = visitShiftByConstant(N, N1C);
     if (NewSRA.getNode())
       return NewSRA;
@@ -4368,24 +4557,24 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
 SDValue DAGCombiner::visitSRL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N1C = isConstOrConstSplat(N1);
   }
 
   // fold (srl c1, c2) -> c1 >>u c2
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
   // fold (srl 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
     return N0;
   // fold (srl x, c >= size(x)) -> undef
   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4396,17 +4585,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // if (srl x, c) is known to be zero, return 0
   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
                                    APInt::getAllOnesValue(OpSizeInBits)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
 
   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRL) {
     if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t c1 = N01C->getZExtValue();
       uint64_t c2 = N1C->getZExtValue();
+      SDLoc DL(N);
       if (c1 + c2 >= OpSizeInBits)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, N1.getValueType()));
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
     }
   }
 
@@ -4422,12 +4612,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
     if (c1 + OpSizeInBits == InnerShiftSize) {
+      SDLoc DL(N0);
       if (c1 + c2 >= InnerShiftSize)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
-                         DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::TRUNCATE, DL, VT,
+                         DAG.getNode(ISD::SRL, DL, InnerShiftVT,
                                      N0.getOperand(0)->getOperand(0),
-                                     DAG.getConstant(c1 + c2, ShiftCountVT)));
+                                     DAG.getConstant(c1 + c2, DL,
+                                                     ShiftCountVT)));
     }
   }
 
@@ -4436,8 +4628,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     unsigned BitSize = N0.getScalarValueSizeInBits();
     if (BitSize <= 64) {
       uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
-      return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(~0ULL >> ShAmt, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(~0ULL >> ShAmt, DL, VT));
     }
   }
 
@@ -4451,14 +4644,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
       uint64_t ShiftAmt = N1C->getZExtValue();
-      SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
+      SDLoc DL0(N0);
+      SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
                                        N0.getOperand(0),
-                          DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+                          DAG.getConstant(ShiftAmt, DL0,
+                                          getShiftAmountTy(SmallVT)));
       AddToWorklist(SmallShift.getNode());
       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
-      return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                         DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
-                         DAG.getConstant(Mask, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::AND, DL, VT,
+                         DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
+                         DAG.getConstant(Mask, DL, VT));
     }
   }
 
@@ -4477,12 +4673,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
     // If any of the input bits are KnownOne, then the input couldn't be all
     // zeros, thus the result of the srl will always be zero.
-    if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+    if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
 
     // If all of the bits input the to ctlz node are known to be zero, then
     // the result of the ctlz is "32" and the result of the shift is one.
     APInt UnknownBits = ~KnownZero;
-    if (UnknownBits == 0) return DAG.getConstant(1, VT);
+    if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
 
     // Otherwise, check to see if there is exactly one bit input to the ctlz.
     if ((UnknownBits & (UnknownBits - 1)) == 0) {
@@ -4494,13 +4690,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       SDValue Op = N0.getOperand(0);
 
       if (ShAmt) {
-        Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
-                  DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+        SDLoc DL(N0);
+        Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+                  DAG.getConstant(ShAmt, DL,
+                                  getShiftAmountTy(Op.getValueType())));
         AddToWorklist(Op.getNode());
       }
 
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT,
-                         Op, DAG.getConstant(1, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT,
+                         Op, DAG.getConstant(1, DL, VT));
     }
   }
 
@@ -4517,7 +4716,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
-  if (N1C) {
+  if (N1C && !N1C->isOpaque()) {
     SDValue NewSRL = visitShiftByConstant(N, N1C);
     if (NewSRL.getNode())
       return NewSRL;
@@ -4651,23 +4850,19 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue N2 = N->getOperand(2);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
   EVT VT = N->getValueType(0);
   EVT VT0 = N0.getValueType();
 
   // fold (select C, X, X) -> X
   if (N1 == N2)
     return N1;
-  // fold (select true, X, Y) -> X
-  if (N0C && !N0C->isNullValue())
-    return N1;
-  // fold (select false, X, Y) -> Y
-  if (N0C && N0C->isNullValue())
-    return N2;
+  if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
+    // fold (select true, X, Y) -> X
+    // fold (select false, X, Y) -> Y
+    return !N0C->isNullValue() ? N1 : N2;
+  }
   // fold (select C, 1, X) -> (or C, X)
-  if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+  if (VT == MVT::i1 && isOneConstant(N1))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   // fold (select C, 0, 1) -> (xor C, 1)
   // We can't do this reliably if integer based booleans have different contents
@@ -4684,40 +4879,43 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
                               TLI.getBooleanContents(false, true) &&
                           TLI.getBooleanContents(false, false) ==
                               TargetLowering::ZeroOrOneBooleanContent)) &&
-      N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+      isNullConstant(N1) && isOneConstant(N2)) {
     SDValue XORNode;
-    if (VT == VT0)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
-                         N0, DAG.getConstant(1, VT0));
-    XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
-                          N0, DAG.getConstant(1, VT0));
+    if (VT == VT0) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT0,
+                         N0, DAG.getConstant(1, DL, VT0));
+    }
+    SDLoc DL0(N0);
+    XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
+                          N0, DAG.getConstant(1, DL0, VT0));
     AddToWorklist(XORNode.getNode());
     if (VT.bitsGT(VT0))
       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
   }
   // fold (select C, 0, X) -> (and (not C), X)
-  if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+  if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
     AddToWorklist(NOTNode.getNode());
     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
   }
   // fold (select C, X, 1) -> (or (not C), X)
-  if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+  if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
     AddToWorklist(NOTNode.getNode());
     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
   }
   // fold (select C, X, 0) -> (and C, X)
-  if (VT == MVT::i1 && N2C && N2C->isNullValue())
+  if (VT == MVT::i1 && isNullConstant(N2))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
   // fold (select X, X, Y) -> (or X, Y)
   // fold (select X, 1, Y) -> (or X, Y)
-  if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+  if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   // fold (select X, Y, X) -> (and X, Y)
   // fold (select X, Y, 0) -> (and X, Y)
-  if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+  if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
 
   // If we can fold this based on the true/false value, do so.
@@ -4757,6 +4955,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     return SimplifySelect(SDLoc(N), N0, N1, N2);
   }
 
+  if (VT0 == MVT::i1) {
+    if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+      // select (and Cond0, Cond1), X, Y
+      //   -> select Cond0, (select Cond1, X, Y), Y
+      if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+        SDValue Cond0 = N0->getOperand(0);
+        SDValue Cond1 = N0->getOperand(1);
+        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                          N1.getValueType(), Cond1, N1, N2);
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+                           InnerSelect, N2);
+      }
+      // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+      if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+        SDValue Cond0 = N0->getOperand(0);
+        SDValue Cond1 = N0->getOperand(1);
+        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                          N1.getValueType(), Cond1, N1, N2);
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+                           InnerSelect);
+      }
+    }
+
+    // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+    if (N1->getOpcode() == ISD::SELECT) {
+      SDValue N1_0 = N1->getOperand(0);
+      SDValue N1_1 = N1->getOperand(1);
+      SDValue N1_2 = N1->getOperand(2);
+      if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
+        // Create the actual and node if we can generate good code for it.
+        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+          SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+                                    N0, N1_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+                             N1_1, N2);
+        }
+        // Otherwise see if we can optimize the "and" to a better pattern.
+        if (SDValue Combined = visitANDLike(N0, N1_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1_1, N2);
+      }
+    }
+    // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+    if (N2->getOpcode() == ISD::SELECT) {
+      SDValue N2_0 = N2->getOperand(0);
+      SDValue N2_1 = N2->getOperand(1);
+      SDValue N2_2 = N2->getOperand(2);
+      if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
+        // Create the actual or node if we can generate good code for it.
+        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+          SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+                                   N0, N2_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+                             N1, N2_2);
+        }
+        // Otherwise see if we can optimize to a better pattern.
+        if (SDValue Combined = visitORLike(N0, N2_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1, N2_2);
+      }
+    }
+  }
+
   return SDValue();
 }
 
@@ -4832,6 +5093,67 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
 }
 
+SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
+  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+  SDValue Mask = MSC->getMask();
+  SDValue Data  = MSC->getValue();
+  SDLoc DL(N);
+
+  // If the MSCATTER data type requires splitting and the mask is provided by a
+  // SETCC, then split both nodes and its operands before legalization. This
+  // prevents the type legalizer from unrolling SETCC into scalar comparisons
+  // and enables future optimizations (e.g. min/max pattern matching on X86).
+  if (Mask.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Check if any splitting is required.
+  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+      TargetLowering::TypeSplitVector)
+    return SDValue();
+  SDValue MaskLo, MaskHi, Lo, Hi;
+  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
+
+  SDValue Chain = MSC->getChain();
+
+  EVT MemoryVT = MSC->getMemoryVT();
+  unsigned Alignment = MSC->getOriginalAlignment();
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue DataLo, DataHi;
+  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+  SDValue BasePtr = MSC->getBasePtr();
+  SDValue IndexLo, IndexHi;
+  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MSC->getPointerInfo(), 
+                          MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
+                          Alignment, MSC->getAAInfo(), MSC->getRanges());
+
+  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
+  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+                            DL, OpsLo, MMO);
+
+  SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
+  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+                            DL, OpsHi, MMO);
+
+  AddToWorklist(Lo.getNode());
+  AddToWorklist(Hi.getNode());
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
 
   if (Level >= AfterLegalizeTypes)
@@ -4878,7 +5200,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
 
     MachineMemOperand *MMO = DAG.getMachineFunction().
-      getMachineMemOperand(MST->getPointerInfo(), 
+      getMachineMemOperand(MST->getPointerInfo(),
                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
                            Alignment, MST->getAAInfo(), MST->getRanges());
 
@@ -4887,10 +5209,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
 
     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
 
     MMO = DAG.getMachineFunction().
-      getMachineMemOperand(MST->getPointerInfo(), 
+      getMachineMemOperand(MST->getPointerInfo(),
                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
                            SecondHalfAlignment, MST->getAAInfo(),
                            MST->getRanges());
@@ -4906,6 +5228,83 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitMGATHER(SDNode *N) {
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
+  MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
+  SDValue Mask = MGT->getMask();
+  SDLoc DL(N);
+
+  // If the MGATHER result requires splitting and the mask is provided by a
+  // SETCC, then split both nodes and its operands before legalization. This
+  // prevents the type legalizer from unrolling SETCC into scalar comparisons
+  // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+  if (Mask.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // Check if any splitting is required.
+  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+      TargetLowering::TypeSplitVector)
+    return SDValue();
+
+  SDValue MaskLo, MaskHi, Lo, Hi;
+  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+  SDValue Src0 = MGT->getValue();
+  SDValue Src0Lo, Src0Hi;
+  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+  SDValue Chain = MGT->getChain();
+  EVT MemoryVT = MGT->getMemoryVT();
+  unsigned Alignment = MGT->getOriginalAlignment();
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue BasePtr = MGT->getBasePtr();
+  SDValue Index = MGT->getIndex();
+  SDValue IndexLo, IndexHi;
+  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(), 
+                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
+                          Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
+  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
+                            MMO);
+
+  SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
+  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
+                            MMO);
+
+  AddToWorklist(Lo.getNode());
+  AddToWorklist(Hi.getNode());
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+                      Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
+
+  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+  SDValue RetOps[] = { GatherRes, Chain };
+  return DAG.getMergeValues(RetOps, DL);
+}
+
 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
 
   if (Level >= AfterLegalizeTypes)
@@ -4953,7 +5352,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
     MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MLD->getPointerInfo(), 
+    getMachineMemOperand(MLD->getPointerInfo(),
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
@@ -4962,10 +5361,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
 
     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
 
     MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MLD->getPointerInfo(), 
+    getMachineMemOperand(MLD->getPointerInfo(),
                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
 
@@ -5021,7 +5420,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
       EVT VT = LHS.getValueType();
       SDValue Shift = DAG.getNode(
           ISD::SRA, DL, VT, LHS,
-          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
       AddToWorklist(Shift.getNode());
       AddToWorklist(Add.getNode());
@@ -5029,6 +5428,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
     }
   }
 
+  if (SimplifySelectOps(N, N1, N2))
+    return SDValue(N, 0);  // Don't revisit N.
+
   // If the VSELECT result requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -5141,7 +5543,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   EVT VT = N->getValueType(0);
 
   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
-         Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+         Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+         && "Expected EXTEND dag node in input!");
 
   // fold (sext c1) -> c1
   // fold (zext c1) -> c1
@@ -5163,7 +5566,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
   unsigned ShAmt = VTBits - EVTBits;
   SmallVector<SDValue, 8> Elts;
-  unsigned NumElts = N0->getNumOperands();
+  unsigned NumElts = VT.getVectorNumElements();
   SDLoc DL(N);
 
   for (unsigned i=0; i != NumElts; ++i) {
@@ -5173,14 +5576,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
       continue;
     }
 
+    SDLoc DL(Op);
     ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
     const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
-    if (Opcode == ISD::SIGN_EXTEND)
+    if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
       Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     SVT));
+                                     DL, SVT));
     else
       Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
-                                     SVT));
+                                     DL, SVT));
   }
 
   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
@@ -5271,6 +5675,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
   }
 }
 
+// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
+SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT DstVT = N->getValueType(0);
+  EVT SrcVT = N0.getValueType();
+
+  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+          N->getOpcode() == ISD::ZERO_EXTEND) &&
+         "Unexpected node type (not an extend)!");
+
+  // fold (sext (load x)) to multiple smaller sextloads; same for zext.
+  // For example, on a target with legal v4i32, but illegal v8i32, turn:
+  //   (v8i32 (sext (v8i16 (load x))))
+  // into:
+  //   (v8i32 (concat_vectors (v4i32 (sextload x)),
+  //                          (v4i32 (sextload (x + 16)))))
+  // Where uses of the original load, i.e.:
+  //   (v8i16 (load x))
+  // are replaced with:
+  //   (v8i16 (truncate
+  //     (v8i32 (concat_vectors (v4i32 (sextload x)),
+  //                            (v4i32 (sextload (x + 16)))))))
+  //
+  // This combine is only applicable to illegal, but splittable, vectors.
+  // All legal types, and illegal non-vector types, are handled elsewhere.
+  // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
+  //
+  if (N0->getOpcode() != ISD::LOAD)
+    return SDValue();
+
+  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+  if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
+      !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
+      !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+    return SDValue();
+
+  SmallVector<SDNode *, 4> SetCCs;
+  if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
+    return SDValue();
+
+  ISD::LoadExtType ExtType =
+      N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+
+  // Try to split the vector types to get down to legal types.
+  EVT SplitSrcVT = SrcVT;
+  EVT SplitDstVT = DstVT;
+  while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+         SplitSrcVT.getVectorNumElements() > 1) {
+    SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
+    SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
+  }
+
+  if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+    return SDValue();
+
+  SDLoc DL(N);
+  const unsigned NumSplits =
+      DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
+  const unsigned Stride = SplitSrcVT.getStoreSize();
+  SmallVector<SDValue, 4> Loads;
+  SmallVector<SDValue, 4> Chains;
+
+  SDValue BasePtr = LN0->getBasePtr();
+  for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
+    const unsigned Offset = Idx * Stride;
+    const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+
+    SDValue SplitLoad = DAG.getExtLoad(
+        ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
+        LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
+        LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
+        Align, LN0->getAAInfo());
+
+    BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+                          DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+
+    Loads.push_back(SplitLoad.getValue(0));
+    Chains.push_back(SplitLoad.getValue(1));
+  }
+
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+  SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+
+  CombineTo(N, NewValue);
+
+  // Replace uses of the original load (before extension)
+  // with a truncate of the concatenated sextloaded vectors.
+  SDValue Trunc =
+      DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+  CombineTo(N0.getNode(), Trunc, NewChain);
+  ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
+                  (ISD::NodeType)N->getOpcode());
+  return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -5337,17 +5837,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   }
 
   // fold (sext (load x)) -> (sext (truncate (sextload x)))
-  // None of the supported targets knows how to perform load and sign extend
-  // on vectors in one instruction.  We only perform this transformation on
-  // scalars.
-  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
-      ISD::isUNINDEXEDLoad(N0.getNode()) &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+  // Only generate vector extloads when 1) they're legal, and 2) they are
+  // deemed desirable by the target.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      ((!LegalOperations && !VT.isVector() &&
+        !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
     bool DoXform = true;
     SmallVector<SDNode*, 4> SetCCs;
     if (!N0.hasOneUse())
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+    if (VT.isVector())
+      DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -5364,6 +5865,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     }
   }
 
+  // fold (sext (load x)) to multiple smaller sextloads.
+  // Only on illegal but splittable vectors.
+  if (SDValue ExtLoad = CombineExtLoad(N))
+    return ExtLoad;
+
   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
@@ -5407,14 +5913,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                          LN0->getMemOperand());
         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
         Mask = Mask.sext(VT.getSizeInBits());
-        SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
-                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDLoc DL(N);
+        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
                                     SDLoc(N0.getOperand(0)),
                                     N0.getOperand(0).getValueType(), ExtLoad);
         CombineTo(N, And);
         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
-        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
                         ISD::SIGN_EXTEND);
         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
       }
@@ -5457,11 +5964,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 
     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
     unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+    SDLoc DL(N);
     SDValue NegOne =
-      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
     SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       NegOne, DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       NegOne, DAG.getConstant(0, DL, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
 
@@ -5473,7 +5981,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
         SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
                                      N0.getOperand(0), N0.getOperand(1), CC);
         return DAG.getSelect(DL, VT, SetCC,
-                             NegOne, DAG.getConstant(0, VT));
+                             NegOne, DAG.getConstant(0, DL, VT));
       }
     }
   }
@@ -5507,11 +6015,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
   SDValue Op1 = N->getOperand(1);
   assert(Op0.getValueType() == Op1.getValueType());
 
-  ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
-  ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
-  if (COp0 && COp0->isNullValue())
+  if (isNullConstant(Op0))
     Op = Op1;
-  else if (COp1 && COp1->isNullValue())
+  else if (isNullConstant(Op1))
     Op = Op0;
   else
     return false;
@@ -5622,22 +6128,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
     Mask = Mask.zext(VT.getSizeInBits());
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       X, DAG.getConstant(Mask, VT));
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT,
+                       X, DAG.getConstant(Mask, DL, VT));
   }
 
   // fold (zext (load x)) -> (zext (truncate (zextload x)))
-  // None of the supported targets knows how to perform load and vector_zext
-  // on vectors in one instruction.  We only perform this transformation on
-  // scalars.
-  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
-      ISD::isUNINDEXEDLoad(N0.getNode()) &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+  // Only generate vector extloads when 1) they're legal, and 2) they are
+  // deemed desirable by the target.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      ((!LegalOperations && !VT.isVector() &&
+        !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
     bool DoXform = true;
     SmallVector<SDNode*, 4> SetCCs;
     if (!N0.hasOneUse())
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+    if (VT.isVector())
+      DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
@@ -5655,6 +6163,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     }
   }
 
+  // fold (zext (load x)) to multiple smaller zextloads.
+  // Only on illegal but splittable vectors.
+  if (SDValue ExtLoad = CombineExtLoad(N))
+    return ExtLoad;
+
   // fold (zext (and/or/xor (load x), cst)) ->
   //      (and/or/xor (zextload x), (zext cst))
   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
@@ -5677,14 +6190,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                          LN0->getMemOperand());
         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
         Mask = Mask.zext(VT.getSizeInBits());
-        SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
-                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDLoc DL(N);
+        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
                                     SDLoc(N0.getOperand(0)),
                                     N0.getOperand(0).getValueType(), ExtLoad);
         CombineTo(N, And);
         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
-        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
                         ISD::ZERO_EXTEND);
         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
       }
@@ -5722,19 +6236,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
       // Only do this before legalize for now.
       EVT EltVT = VT.getVectorElementType();
+      SDLoc DL(N);
       SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
-                                    DAG.getConstant(1, EltVT));
+                                    DAG.getConstant(1, DL, EltVT));
       if (VT.getSizeInBits() == N0VT.getSizeInBits())
         // We know that the # elements of the results is the same as the
         // # elements of the compare (and the # elements of the compare result
         // for that matter).  Check to see that they are the same size.  If so,
         // we know that the element size of the sext'd result matches the
         // element size of the compare operands.
-        return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                           DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+        return DAG.getNode(ISD::AND, DL, VT,
+                           DAG.getSetCC(DL, VT, N0.getOperand(0),
                                          N0.getOperand(1),
                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
-                           DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
+                           DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
                                        OneOps));
 
       // If the desired elements are smaller or larger than the source
@@ -5747,18 +6262,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
         EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
                          N0VT.getVectorNumElements());
       SDValue VsetCC =
-        DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+        DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
                       N0.getOperand(1),
                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
-      return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                         DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
-                         DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
+      return DAG.getNode(ISD::AND, DL, VT,
+                         DAG.getSExtOrTrunc(VsetCC, DL, VT),
+                         DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
     }
 
     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDLoc DL(N);
     SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
   }
@@ -5850,8 +6366,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
     Mask = Mask.zext(VT.getSizeInBits());
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       X, DAG.getConstant(Mask, VT));
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT,
+                       X, DAG.getConstant(Mask, DL, VT));
   }
 
   // fold (aext (load x)) -> (aext (truncate (extload x)))
@@ -5934,9 +6451,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
     }
 
     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDLoc DL(N);
     SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode())
       return SCC;
@@ -5957,7 +6475,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
     const APInt &CVal = CV->getAPIntValue();
     APInt NewVal = CVal & Mask;
     if (NewVal != CVal)
-      return DAG.getConstant(NewVal, V.getValueType());
+      return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
     break;
   }
   case ISD::OR:
@@ -5972,7 +6490,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
     // Only look at single-use SRLs.
     if (!V.getNode()->hasOneUse())
       break;
-    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+    if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
       // See if we can recursively simplify the LHS.
       unsigned Amt = RHSC->getZExtValue();
 
@@ -6117,9 +6635,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
   uint64_t PtrOff = ShAmt / 8;
   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
-  SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
+  SDLoc DL(LN0);
+  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
                                PtrType, LN0->getBasePtr(),
-                               DAG.getConstant(PtrOff, PtrType));
+                               DAG.getConstant(PtrOff, DL, PtrType));
   AddToWorklist(NewPtr.getNode());
 
   SDValue Load;
@@ -6148,11 +6667,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     // no larger than the source) then the useful bits of the result are
     // zero; we can't simply return the shortened shift, because the result
     // of that operation is undefined.
+    SDLoc DL(N0);
     if (ShLeftAmt >= VT.getSizeInBits())
-      Result = DAG.getConstant(0, VT);
+      Result = DAG.getConstant(0, DL, VT);
     else
-      Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
-                          Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+      Result = DAG.getNode(ISD::SHL, DL, VT,
+                          Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
   }
 
   // Return the new loaded value.
@@ -6279,7 +6799,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
       ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
       const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
       Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     Op.getValueType()));
+                                     SDLoc(Op), Op.getValueType()));
     }
 
     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
@@ -6288,6 +6808,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(VT);
+
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -6297,7 +6831,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   if (N0.getValueType() == N->getValueType(0))
     return N0;
   // fold (truncate c1) -> c1
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
   // fold (truncate (truncate x)) -> (truncate x)
   if (N0.getOpcode() == ISD::TRUNCATE)
@@ -6350,9 +6884,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
                               NVT, N0.getOperand(0));
 
+      SDLoc DL(N);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
-                         SDLoc(N), TrTy, V,
-                         DAG.getConstant(Index, IndexTy));
+                         DL, TrTy, V,
+                         DAG.getConstant(Index, DL, IndexTy));
     }
   }
 
@@ -6598,13 +7133,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
                                   N0.getOperand(0));
     AddToWorklist(NewConv.getNode());
 
+    SDLoc DL(N);
     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
     if (N0.getOpcode() == ISD::FNEG)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT,
-                         NewConv, DAG.getConstant(SignBit, VT));
+      return DAG.getNode(ISD::XOR, DL, VT,
+                         NewConv, DAG.getConstant(SignBit, DL, VT));
     assert(N0.getOpcode() == ISD::FABS);
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       NewConv, DAG.getConstant(~SignBit, VT));
+    return DAG.getNode(ISD::AND, DL, VT,
+                       NewConv, DAG.getConstant(~SignBit, DL, VT));
   }
 
   // fold (bitconvert (fcopysign cst, x)) ->
@@ -6629,9 +7165,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       } else if (OrigXWidth > VTWidth) {
         // To get the sign bit in the right place, we have to shift it right
         // before truncating.
-        X = DAG.getNode(ISD::SRL, SDLoc(X),
+        SDLoc DL(X);
+        X = DAG.getNode(ISD::SRL, DL,
                         X.getValueType(), X,
-                        DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+                        DAG.getConstant(OrigXWidth-VTWidth, DL,
+                                        X.getValueType()));
         AddToWorklist(X.getNode());
         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
         AddToWorklist(X.getNode());
@@ -6639,13 +7177,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
 
       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
-                      X, DAG.getConstant(SignBit, VT));
+                      X, DAG.getConstant(SignBit, SDLoc(X), VT));
       AddToWorklist(X.getNode());
 
       SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
                                 VT, N0.getOperand(0));
       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
-                        Cst, DAG.getConstant(~SignBit, VT));
+                        Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
       AddToWorklist(Cst.getNode());
 
       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
@@ -6659,6 +7197,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       return CombineLD;
   }
 
+  // Remove double bitcasts from shuffles - this is often a legacy of
+  // XformToShuffleWithZero being used to combine bitmaskings (of
+  // float vectors bitcast to integer vectors) into shuffles.
+  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
+  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
+      N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+      VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
+      !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
+    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
+
+    // If operands are a bitcast, peek through if it casts the original VT.
+    // If operands are a UNDEF or constant, just bitcast back to original VT.
+    auto PeekThroughBitcast = [&](SDValue Op) {
+      if (Op.getOpcode() == ISD::BITCAST &&
+          Op.getOperand(0)->getValueType(0) == VT)
+        return SDValue(Op.getOperand(0));
+      if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+          ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+        return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+      return SDValue();
+    };
+
+    SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
+    SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
+    if (!(SV0 && SV1))
+      return SDValue();
+
+    int MaskScale =
+        VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
+    SmallVector<int, 8> NewMask;
+    for (int M : SVN->getMask())
+      for (int i = 0; i != MaskScale; ++i)
+        NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
+
+    bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+    if (!LegalMask) {
+      std::swap(SV0, SV1);
+      ShuffleVectorSDNode::commuteMask(NewMask);
+      LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+    }
+
+    if (LegalMask)
+      return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+  }
+
   return SDValue();
 }
 
@@ -6727,6 +7310,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   }
 
+  SDLoc DL(BV);
+
   // Okay, we know the src/dst types are both integers of differing types.
   // Handling growing first.
   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
@@ -6753,16 +7338,15 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       if (EltIsUndef)
         Ops.push_back(DAG.getUNDEF(DstEltVT));
       else
-        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+        Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
     }
 
     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
   }
 
   // Finally, this must be the case where we are shrinking elements: each input
   // turns into multiple outputs.
-  bool isS2V = ISD::isScalarToVector(BV);
   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
                             NumOutputsPerInput*BV->getNumOperands());
@@ -6770,8 +7354,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
 
   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
     if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
-      for (unsigned j = 0; j != NumOutputsPerInput; ++j)
-        Ops.push_back(DAG.getUNDEF(DstEltVT));
+      Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
       continue;
     }
 
@@ -6780,11 +7363,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
 
     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
       APInt ThisVal = OpVal.trunc(DstBitSize);
-      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
-      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
-        // Simply turn this into a SCALAR_TO_VECTOR of the new type.
-        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
-                           Ops[0]);
+      Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
       OpVal = OpVal.lshr(DstBitSize);
     }
 
@@ -6793,7 +7372,450 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
   }
 
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+}
+
+/// Try to perform FMA combining on a given FADD node.
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  SDLoc SL(N);
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+                       Options.UnsafeFPMath);
+
+  // Floating-point multiply-add with intermediate rounding.
+  bool HasFMAD = (LegalOperations &&
+                  TLI.isOperationLegal(ISD::FMAD, VT));
+
+  // Floating-point multiply-add without intermediate rounding.
+  bool HasFMA = ((!LegalOperations ||
+                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+                 UnsafeFPMath);
+
+  // No valid opcode, do not combine.
+  if (!HasFMAD && !HasFMA)
+    return SDValue();
+
+  // Always prefer FMAD to FMA for precision.
+  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+  bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
+  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+  if (N0.getOpcode() == ISD::FMUL &&
+      (Aggressive || N0->hasOneUse())) {
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       N0.getOperand(0), N0.getOperand(1), N1);
+  }
+
+  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+  // Note: Commutes FADD operands.
+  if (N1.getOpcode() == ISD::FMUL &&
+      (Aggressive || N1->hasOneUse())) {
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       N1.getOperand(0), N1.getOperand(1), N0);
+  }
+
+  // Look through FP_EXTEND nodes to do more combining.
+  if (UnsafeFPMath && LookThroughFPExt) {
+    // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(1)), N1);
+    }
+
+    // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+    // Note: Commutes FADD operands.
+    if (N1.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N10 = N1.getOperand(0);
+      if (N10.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(1)), N0);
+    }
+  }
+
+  // More folding opportunities when target permits.
+  if ((UnsafeFPMath || HasFMAD)  && Aggressive) {
+    // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+    if (N0.getOpcode() == PreferredFusedOpcode &&
+        N0.getOperand(2).getOpcode() == ISD::FMUL) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     N0.getOperand(2).getOperand(0),
+                                     N0.getOperand(2).getOperand(1),
+                                     N1));
+    }
+
+    // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+    if (N1->getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FMUL) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         N1.getOperand(0), N1.getOperand(1),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     N1.getOperand(2).getOperand(0),
+                                     N1.getOperand(2).getOperand(1),
+                                     N0));
+    }
+
+    if (UnsafeFPMath && LookThroughFPExt) {
+      // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+      //   -> (fma x, y, (fma (fpext u), (fpext v), z))
+      auto FoldFAddFMAFPExtFMul = [&] (
+          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+                                       Z));
+      };
+      if (N0.getOpcode() == PreferredFusedOpcode) {
+        SDValue N02 = N0.getOperand(2);
+        if (N02.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N020 = N02.getOperand(0);
+          if (N020.getOpcode() == ISD::FMUL)
+            return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+                                        N020.getOperand(0), N020.getOperand(1),
+                                        N1);
+        }
+      }
+
+      // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+      //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      auto FoldFAddFPExtFMAFMul = [&] (
+          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+                                       Z));
+      };
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == PreferredFusedOpcode) {
+          SDValue N002 = N00.getOperand(2);
+          if (N002.getOpcode() == ISD::FMUL)
+            return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+                                        N002.getOperand(0), N002.getOperand(1),
+                                        N1);
+        }
+      }
+
+      // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+      //   -> (fma y, z, (fma (fpext u), (fpext v), x))
+      if (N1.getOpcode() == PreferredFusedOpcode) {
+        SDValue N12 = N1.getOperand(2);
+        if (N12.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N120 = N12.getOperand(0);
+          if (N120.getOpcode() == ISD::FMUL)
+            return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+                                        N120.getOperand(0), N120.getOperand(1),
+                                        N0);
+        }
+      }
+
+      // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+      //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N1.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N10 = N1.getOperand(0);
+        if (N10.getOpcode() == PreferredFusedOpcode) {
+          SDValue N102 = N10.getOperand(2);
+          if (N102.getOpcode() == ISD::FMUL)
+            return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+                                        N102.getOperand(0), N102.getOperand(1),
+                                        N0);
+        }
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+/// Try to perform FMA combining on a given FSUB node.
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  SDLoc SL(N);
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+                       Options.UnsafeFPMath);
+
+  // Floating-point multiply-add with intermediate rounding.
+  bool HasFMAD = (LegalOperations &&
+                  TLI.isOperationLegal(ISD::FMAD, VT));
+
+  // Floating-point multiply-add without intermediate rounding.
+  bool HasFMA = ((!LegalOperations ||
+                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+                 UnsafeFPMath);
+
+  // No valid opcode, do not combine.
+  if (!HasFMAD && !HasFMA)
+    return SDValue();
+
+  // Always prefer FMAD to FMA for precision.
+  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+  bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
+  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+  if (N0.getOpcode() == ISD::FMUL &&
+      (Aggressive || N0->hasOneUse())) {
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       N0.getOperand(0), N0.getOperand(1),
+                       DAG.getNode(ISD::FNEG, SL, VT, N1));
+  }
+
+  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+  // Note: Commutes FSUB operands.
+  if (N1.getOpcode() == ISD::FMUL &&
+      (Aggressive || N1->hasOneUse()))
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       DAG.getNode(ISD::FNEG, SL, VT,
+                                   N1.getOperand(0)),
+                       N1.getOperand(1), N0);
+
+  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+  if (N0.getOpcode() == ISD::FNEG &&
+      N0.getOperand(0).getOpcode() == ISD::FMUL &&
+      (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+    SDValue N00 = N0.getOperand(0).getOperand(0);
+    SDValue N01 = N0.getOperand(0).getOperand(1);
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                       DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+                       DAG.getNode(ISD::FNEG, SL, VT, N1));
+  }
+
+  // Look through FP_EXTEND nodes to do more combining.
+  if (UnsafeFPMath && LookThroughFPExt) {
+    // fold (fsub (fpext (fmul x, y)), z)
+    //   -> (fma (fpext x), (fpext y), (fneg z))
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(1)),
+                           DAG.getNode(ISD::FNEG, SL, VT, N1));
+    }
+
+    // fold (fsub x, (fpext (fmul y, z)))
+    //   -> (fma (fneg (fpext y)), (fpext z), x)
+    // Note: Commutes FSUB operands.
+    if (N1.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N10 = N1.getOperand(0);
+      if (N10.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FNEG, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N10.getOperand(0))),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(1)),
+                           N0);
+    }
+
+    // fold (fsub (fpext (fneg (fmul, x, y))), z)
+    //   -> (fneg (fma (fpext x), (fpext y), z))
+    // Note: This could be removed with appropriate canonicalization of the
+    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+    // from implementing the canonicalization in visitFSUB.
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FNEG) {
+        SDValue N000 = N00.getOperand(0);
+        if (N000.getOpcode() == ISD::FMUL) {
+          return DAG.getNode(ISD::FNEG, SL, VT,
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(0)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(1)),
+                                         N1));
+        }
+      }
+    }
+
+    // fold (fsub (fneg (fpext (fmul, x, y))), z)
+    //   -> (fneg (fma (fpext x)), (fpext y), z)
+    // Note: This could be removed with appropriate canonicalization of the
+    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+    // from implementing the canonicalization in visitFSUB.
+    if (N0.getOpcode() == ISD::FNEG) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N000 = N00.getOperand(0);
+        if (N000.getOpcode() == ISD::FMUL) {
+          return DAG.getNode(ISD::FNEG, SL, VT,
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(0)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(1)),
+                                         N1));
+        }
+      }
+    }
+
+  }
+
+  // More folding opportunities when target permits.
+  if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+    // fold (fsub (fma x, y, (fmul u, v)), z)
+    //   -> (fma x, y (fma u, v, (fneg z)))
+    if (N0.getOpcode() == PreferredFusedOpcode &&
+        N0.getOperand(2).getOpcode() == ISD::FMUL) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     N0.getOperand(2).getOperand(0),
+                                     N0.getOperand(2).getOperand(1),
+                                     DAG.getNode(ISD::FNEG, SL, VT,
+                                                 N1)));
+    }
+
+    // fold (fsub x, (fma y, z, (fmul u, v)))
+    //   -> (fma (fneg y), z, (fma (fneg u), v, x))
+    if (N1.getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FMUL) {
+      SDValue N20 = N1.getOperand(2).getOperand(0);
+      SDValue N21 = N1.getOperand(2).getOperand(1);
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         DAG.getNode(ISD::FNEG, SL, VT,
+                                     N1.getOperand(0)),
+                         N1.getOperand(1),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     DAG.getNode(ISD::FNEG, SL, VT, N20),
+
+                                     N21, N0));
+    }
+
+    if (UnsafeFPMath && LookThroughFPExt) {
+      // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+      //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+      if (N0.getOpcode() == PreferredFusedOpcode) {
+        SDValue N02 = N0.getOperand(2);
+        if (N02.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N020 = N02.getOperand(0);
+          if (N020.getOpcode() == ISD::FMUL)
+            return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                               N0.getOperand(0), N0.getOperand(1),
+                               DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N020.getOperand(0)),
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N020.getOperand(1)),
+                                           DAG.getNode(ISD::FNEG, SL, VT,
+                                                       N1)));
+        }
+      }
+
+      // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+      //   -> (fma (fpext x), (fpext y),
+      //           (fma (fpext u), (fpext v), (fneg z)))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == PreferredFusedOpcode) {
+          SDValue N002 = N00.getOperand(2);
+          if (N002.getOpcode() == ISD::FMUL)
+            return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                           N00.getOperand(0)),
+                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                           N00.getOperand(1)),
+                               DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N002.getOperand(0)),
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N002.getOperand(1)),
+                                           DAG.getNode(ISD::FNEG, SL, VT,
+                                                       N1)));
+        }
+      }
+
+      // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+      //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+      if (N1.getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+        SDValue N120 = N1.getOperand(2).getOperand(0);
+        if (N120.getOpcode() == ISD::FMUL) {
+          SDValue N1200 = N120.getOperand(0);
+          SDValue N1201 = N120.getOperand(1);
+          return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+                             N1.getOperand(1),
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FNEG, SL, VT,
+                                             DAG.getNode(ISD::FP_EXTEND, SL,
+                                                         VT, N1200)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N1201),
+                                         N0));
+        }
+      }
+
+      // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+      //   -> (fma (fneg (fpext y)), (fpext z),
+      //           (fma (fneg (fpext u)), (fpext v), x))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N1.getOpcode() == ISD::FP_EXTEND &&
+        N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+        SDValue N100 = N1.getOperand(0).getOperand(0);
+        SDValue N101 = N1.getOperand(0).getOperand(1);
+        SDValue N102 = N1.getOperand(0).getOperand(2);
+        if (N102.getOpcode() == ISD::FMUL) {
+          SDValue N1020 = N102.getOperand(0);
+          SDValue N1021 = N102.getOperand(1);
+          return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                             DAG.getNode(ISD::FNEG, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N100)),
+                             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FNEG, SL, VT,
+                                             DAG.getNode(ISD::FP_EXTEND, SL,
+                                                         VT, N1020)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N1021),
+                                         N0));
+        }
+      }
+    }
+  }
+
+  return SDValue();
 }
 
 SDValue DAGCombiner::visitFADD(SDNode *N) {
@@ -6802,32 +7824,32 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fadd c1, c2) -> c1 + c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
 
   // canonicalize constant to RHS
   if (N0CFP && !N1CFP)
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
 
   // fold (fadd A, (fneg B)) -> (fsub A, B)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
+    return DAG.getNode(ISD::FSUB, DL, VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
 
   // fold (fadd (fneg A), B) -> (fsub B, A)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
+    return DAG.getNode(ISD::FSUB, DL, VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
   // If 'unsafe math' is enabled, fold lots of things.
@@ -6843,17 +7865,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
         isa<ConstantFPSDNode>(N0.getOperand(1)))
-      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     N0.getOperand(1), N1));
+      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
+                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
 
     // If allowed, fold (fadd (fneg x), x) -> 0.0
     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
-      return DAG.getConstantFP(0.0, VT);
+      return DAG.getConstantFP(0.0, DL, VT);
 
     // If allowed, fold (fadd x, (fneg x)) -> 0.0
     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
-      return DAG.getConstantFP(0.0, VT);
+      return DAG.getConstantFP(0.0, DL, VT);
 
     // We can fold chains of FADD's of the same value into multiplications.
     // This transform is not safe in general because we are reducing the number
@@ -6865,21 +7886,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 
         // (fadd (fmul x, c), x) -> (fmul x, c+1)
         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP01, 0),
-                                       DAG.getConstantFP(1.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+                                       DAG.getConstantFP(1.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
         }
 
         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
             N1.getOperand(0) == N1.getOperand(1) &&
             N0.getOperand(0) == N1.getOperand(0)) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP01, 0),
-                                       DAG.getConstantFP(2.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N0.getOperand(0), NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+                                       DAG.getConstantFP(2.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
         }
       }
 
@@ -6889,20 +7907,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 
         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP11, 0),
-                                       DAG.getConstantFP(1.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+                                       DAG.getConstantFP(1.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
         }
 
         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
             N0.getOperand(0) == N0.getOperand(1) &&
             N1.getOperand(0) == N0.getOperand(0)) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP11, 0),
-                                       DAG.getConstantFP(2.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+                                       DAG.getConstantFP(2.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
         }
       }
 
@@ -6910,18 +7926,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
         ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
         if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
-            (N0.getOperand(0) == N1))
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N1, DAG.getConstantFP(3.0, VT));
+            (N0.getOperand(0) == N1)) {
+          return DAG.getNode(ISD::FMUL, DL, VT,
+                             N1, DAG.getConstantFP(3.0, DL, VT));
+        }
       }
 
       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
         ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
-            N1.getOperand(0) == N0)
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N0, DAG.getConstantFP(3.0, VT));
+            N1.getOperand(0) == N0) {
+          return DAG.getNode(ISD::FMUL, DL, VT,
+                             N0, DAG.getConstantFP(3.0, DL, VT));
+        }
       }
 
       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
@@ -6929,81 +7947,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
           N0.getOperand(0) == N0.getOperand(1) &&
           N1.getOperand(0) == N1.getOperand(1) &&
-          N0.getOperand(0) == N1.getOperand(0))
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0.getOperand(0), DAG.getConstantFP(4.0, VT));
+          N0.getOperand(0) == N1.getOperand(0)) {
+        return DAG.getNode(ISD::FMUL, DL, VT,
+                           N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+      }
     }
   } // enable-unsafe-fp-math
 
   // FADD -> FMA combines:
-  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
-      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
-    // fold (fadd (fmul x, y), z) -> (fma x, y, z)
-    if (N0.getOpcode() == ISD::FMUL &&
-        (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
-      return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                         N0.getOperand(0), N0.getOperand(1), N1);
-
-    // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
-    // Note: Commutes FADD operands.
-    if (N1.getOpcode() == ISD::FMUL &&
-        (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
-      return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                         N1.getOperand(0), N1.getOperand(1), N0);
-
-    // When FP_EXTEND nodes are free on the target, and there is an opportunity
-    // to combine into FMA, arrange such nodes accordingly.
-    if (TLI.isFPExtFree(VT)) {
-
-      // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(1)), N1);
-      }
-
-      // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
-      // Note: Commutes FADD operands.
-      if (N1.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N10 = N1.getOperand(0);
-        if (N10.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(1)), N0);
-      }
-    }
-
-    // More folding opportunities when target permits.
-    if (TLI.enableAggressiveFMAFusion(VT)) {
-
-      // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
-      if (N0.getOpcode() == ISD::FMA &&
-          N0.getOperand(2).getOpcode() == ISD::FMUL)
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N0.getOperand(0), N0.getOperand(1),
-                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                       N0.getOperand(2).getOperand(0),
-                                       N0.getOperand(2).getOperand(1),
-                                       N1));
-
-      // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
-      if (N1->getOpcode() == ISD::FMA &&
-          N1.getOperand(2).getOpcode() == ISD::FMUL)
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N1.getOperand(0), N1.getOperand(1),
-                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                       N1.getOperand(2).getOperand(0),
-                                       N1.getOperand(2).getOperand(1),
-                                       N0));
-    }
+  SDValue Fused = visitFADDForFMACombine(N);
+  if (Fused) {
+    AddToWorklist(Fused.getNode());
+    return Fused;
   }
 
   return SDValue();
@@ -7019,14 +7974,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fsub c1, c2) -> c1-c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
 
   // fold (fsub A, (fneg B)) -> (fadd A, B)
   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -7049,7 +8003,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
 
     // (fsub x, x) -> 0.0
     if (N0 == N1)
-      return DAG.getConstantFP(0.0f, VT);
+      return DAG.getConstantFP(0.0f, dl, VT);
 
     // (fsub x, (fadd x, y)) -> (fneg y)
     // (fsub x, (fadd y, x)) -> (fneg y)
@@ -7066,138 +8020,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   }
 
   // FSUB -> FMA combines:
-  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
-      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
-    // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
-    if (N0.getOpcode() == ISD::FMUL &&
-        (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
-      return DAG.getNode(ISD::FMA, dl, VT,
-                         N0.getOperand(0), N0.getOperand(1),
-                         DAG.getNode(ISD::FNEG, dl, VT, N1));
-
-    // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
-    // Note: Commutes FSUB operands.
-    if (N1.getOpcode() == ISD::FMUL &&
-        (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
-      return DAG.getNode(ISD::FMA, dl, VT,
-                         DAG.getNode(ISD::FNEG, dl, VT,
-                         N1.getOperand(0)),
-                         N1.getOperand(1), N0);
-
-    // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
-    if (N0.getOpcode() == ISD::FNEG &&
-        N0.getOperand(0).getOpcode() == ISD::FMUL &&
-        ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
-            TLI.enableAggressiveFMAFusion(VT))) {
-      SDValue N00 = N0.getOperand(0).getOperand(0);
-      SDValue N01 = N0.getOperand(0).getOperand(1);
-      return DAG.getNode(ISD::FMA, dl, VT,
-                         DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
-                         DAG.getNode(ISD::FNEG, dl, VT, N1));
-    }
-
-    // When FP_EXTEND nodes are free on the target, and there is an opportunity
-    // to combine into FMA, arrange such nodes accordingly.
-    if (TLI.isFPExtFree(VT)) {
-
-      // fold (fsub (fpext (fmul x, y)), z)
-      //   -> (fma (fpext x), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(1)),
-                             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
-      }
-
-      // fold (fsub x, (fpext (fmul y, z)))
-      //   -> (fma (fneg (fpext y)), (fpext z), x)
-      // Note: Commutes FSUB operands.
-      if (N1.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N10 = N1.getOperand(0);
-        if (N10.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                         DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                                     VT, N10.getOperand(0))),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(1)),
-                             N0);
-      }
-
-      // fold (fsub (fpext (fneg (fmul, x, y))), z)
-      //   -> (fma (fneg (fpext x)), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FNEG) {
-          SDValue N000 = N00.getOperand(0);
-          if (N000.getOpcode() == ISD::FMUL) {
-            return DAG.getNode(ISD::FMA, dl, VT,
-                               DAG.getNode(ISD::FNEG, dl, VT,
-                                           DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                                       VT, N000.getOperand(0))),
-                               DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                           N000.getOperand(1)),
-                               DAG.getNode(ISD::FNEG, dl, VT, N1));
-          }
-        }
-      }
-
-      // fold (fsub (fneg (fpext (fmul, x, y))), z)
-      //   -> (fma (fneg (fpext x)), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FNEG) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FP_EXTEND) {
-          SDValue N000 = N00.getOperand(0);
-          if (N000.getOpcode() == ISD::FMUL) {
-            return DAG.getNode(ISD::FMA, dl, VT,
-                               DAG.getNode(ISD::FNEG, dl, VT,
-                                           DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                           VT, N000.getOperand(0))),
-                               DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                           N000.getOperand(1)),
-                               DAG.getNode(ISD::FNEG, dl, VT, N1));
-          }
-        }
-      }
-    }
-
-    // More folding opportunities when target permits.
-    if (TLI.enableAggressiveFMAFusion(VT)) {
-
-      // fold (fsub (fma x, y, (fmul u, v)), z)
-      //   -> (fma x, y (fma u, v, (fneg z)))
-      if (N0.getOpcode() == ISD::FMA &&
-          N0.getOperand(2).getOpcode() == ISD::FMUL)
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           N0.getOperand(0), N0.getOperand(1),
-                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                       N0.getOperand(2).getOperand(0),
-                                       N0.getOperand(2).getOperand(1),
-                                       DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                                   N1)));
-
-      // fold (fsub x, (fma y, z, (fmul u, v)))
-      //   -> (fma (fneg y), z, (fma (fneg u), v, x))
-      if (N1.getOpcode() == ISD::FMA &&
-          N1.getOperand(2).getOpcode() == ISD::FMUL) {
-        SDValue N20 = N1.getOperand(2).getOperand(0);
-        SDValue N21 = N1.getOperand(2).getOperand(1);
-        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                           DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                       N1.getOperand(0)),
-                           N1.getOperand(1),
-                           DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                                       DAG.getNode(ISD::FNEG, SDLoc(N),  VT,
-                                                   N20),
-                                       N21, N0));
-      }
-    }
+  SDValue Fused = visitFSUBForFMACombine(N);
+  if (Fused) {
+    AddToWorklist(Fused.getNode());
+    return Fused;
   }
 
   return SDValue();
@@ -7209,29 +8035,24 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   EVT VT = N->getValueType(0);
+  SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
   if (VT.isVector()) {
     // This just handles C1 * C2 for vectors. Other vector folds are below.
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
-    // Canonicalize vector constant to RHS.
-    if (N0.getOpcode() == ISD::BUILD_VECTOR &&
-        N1.getOpcode() != ISD::BUILD_VECTOR)
-      if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
-        if (BV0->isConstant())
-          return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
   }
 
   // fold (fmul c1, c2) -> c1*c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
 
   // canonicalize constant to RHS
-  if (N0CFP && !N1CFP)
-    return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
+  if (isConstantFPBuildVectorOrConstantFP(N0) &&
+     !isConstantFPBuildVectorOrConstantFP(N1))
+    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
 
   // fold (fmul A, 1.0) -> A
   if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -7247,14 +8068,22 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       // Fold scalars or any vector constants (not just splats).
       // This fold is done in general by InstCombine, but extra fmul insts
       // may have been generated during lowering.
+      SDValue N00 = N0.getOperand(0);
       SDValue N01 = N0.getOperand(1);
       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+      auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-      if ((N1CFP && isConstOrConstSplatFP(N01)) ||
-          (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
-        SDLoc SL(N);
-        SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
-        return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+      
+      // Check 1: Make sure that the first operand of the inner multiply is NOT
+      // a constant. Otherwise, we may induce infinite looping.
+      if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
+        // Check 2: Make sure that the second operand of the inner multiply and
+        // the second operand of the outer multiply are constants.
+        if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+            (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
+          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+        }
       }
     }
 
@@ -7263,21 +8092,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
     // during an early run of DAGCombiner can prevent folding with fmuls
     // inserted during lowering.
     if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
-      SDLoc SL(N);
-      const SDValue Two = DAG.getConstantFP(2.0, VT);
-      SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
-      return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
+      const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
+      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
     }
   }
 
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
 
   // fold (fmul X, -1.0) -> (fneg X)
   if (N1CFP && N1CFP->isExactlyValue(-1.0))
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-      return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
+      return DAG.getNode(ISD::FNEG, DL, VT, N0);
 
   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
@@ -7285,7 +8113,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+        return DAG.getNode(ISD::FMUL, DL, VT,
                            GetNegatedExpression(N0, DAG, LegalOperations),
                            GetNegatedExpression(N1, DAG, LegalOperations));
     }
@@ -7364,14 +8192,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   if (Options.UnsafeFPMath && N1CFP && N0 == N2)
     return DAG.getNode(ISD::FMUL, dl, VT, N0,
                        DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(1.0, VT)));
+                                   N1, DAG.getConstantFP(1.0, dl, VT)));
 
   // (fma x, c, (fneg x)) -> (fmul x, (c-1))
   if (Options.UnsafeFPMath && N1CFP &&
       N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
     return DAG.getNode(ISD::FMUL, dl, VT, N0,
                        DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(-1.0, VT)));
+                                   N1, DAG.getConstantFP(-1.0, dl, VT)));
 
 
   return SDValue();
@@ -7387,10 +8215,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fdiv c1, c2) -> c1/c2
   if (N0CFP && N1CFP)
@@ -7412,8 +8239,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
            TLI.isFPImmLegal(Recip, VT)))
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
-                           DAG.getConstantFP(Recip, VT));
+        return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                           DAG.getConstantFP(Recip, DL, VT));
     }
 
     // If this FDIV is part of a reciprocal square root, it may be folded
@@ -7492,24 +8319,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
 
     SmallVector<SDNode *, 4> Users;
     // Find all FDIV users of the same divisor.
-    for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
-                              UE = N1.getNode()->use_end();
-         UI != UE; ++UI) {
-      SDNode *User = UI.getUse().getUser();
-      if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
-        Users.push_back(User);
+    for (auto *U : N1->uses()) {
+      if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
+        Users.push_back(U);
     }
 
     if (TLI.combineRepeatedFPDivisors(Users.size())) {
-      SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
-      SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+      SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
 
       // Dividend / Divisor -> Dividend * Reciprocal
-      for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
-        if ((*I)->getOperand(0) != FPOne) {
-          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
-                                        (*I)->getOperand(0), Reciprocal);
-          DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
+      for (auto *U : Users) {
+        SDValue Dividend = U->getOperand(0);
+        if (Dividend != FPOne) {
+          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+                                        Reciprocal);
+          DAG.ReplaceAllUsesWith(U, NewNode.getNode());
         }
       }
       return SDValue();
@@ -7539,20 +8364,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
     // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
     if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
       EVT VT = RV.getValueType();
-      RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
+      SDLoc DL(N);
+      RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
       AddToWorklist(RV.getNode());
 
       // Unfortunately, RV is now NaN if the input was exactly 0.
       // Select out this case and force the answer to 0.
-      SDValue Zero = DAG.getConstantFP(0.0, VT);
+      SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
       SDValue ZeroCmp =
-        DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+        DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT),
                      N->getOperand(0), Zero, ISD::SETEQ);
       AddToWorklist(ZeroCmp.getNode());
       AddToWorklist(RV.getNode());
 
       RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
-                       SDLoc(N), VT, ZeroCmp, Zero, RV);
+                       DL, VT, ZeroCmp, Zero, RV);
       return RV;
     }
   }
@@ -7611,12 +8437,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
 
 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
   // fold (sint_to_fp c1) -> c1fp
-  if (N0C &&
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -7638,11 +8463,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
         !VT.isVector() &&
         (!LegalOperations ||
          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0), N0.getOperand(1),
-          DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
           N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
     }
 
     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
@@ -7651,11 +8477,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
         (!LegalOperations ||
          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
-          DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
           N0.getOperand(0).getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
     }
   }
 
@@ -7664,12 +8491,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
 
 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
   // fold (uint_to_fp c1) -> c1fp
-  if (N0C &&
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -7691,39 +8517,82 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
         (!LegalOperations ||
          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0), N0.getOperand(1),
-          DAG.getConstantFP(1.0, VT),  DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
           N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
     }
   }
 
   return SDValue();
 }
 
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+    return SDValue();
+
+  SDValue Src = N0.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+  bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+  // We can safely assume the conversion won't overflow the output range,
+  // because (for example) (uint8_t)18293.f is undefined behavior.
+
+  // Since we can assume the conversion won't overflow, our decision as to
+  // whether the input will fit in the float should depend on the minimum
+  // of the input range and output range.
+
+  // This means this is also safe for a signed input and unsigned output, since
+  // a negative input would lead to undefined behavior.
+  unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+  unsigned ActualSize = std::min(InputSize, OutputSize);
+  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+  // We can only fold away the float conversion if the input range can be
+  // represented exactly in the float range.
+  if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+    if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+      unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+                                                       : ISD::ZERO_EXTEND;
+      return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+    }
+    if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+    if (SrcVT == VT)
+      return Src;
+    return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
+  }
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (fp_to_sint c1fp) -> c1
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
 
-  return SDValue();
+  return FoldIntToFPToInt(N, DAG);
 }
 
 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (fp_to_uint c1fp) -> c1
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
 
-  return SDValue();
+  return FoldIntToFPToInt(N, DAG);
 }
 
 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
@@ -7742,11 +8611,18 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
 
   // fold (fp_round (fp_round x)) -> (fp_round x)
   if (N0.getOpcode() == ISD::FP_ROUND) {
-    // This is a value preserving truncation if both round's are.
-    bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
-                   N0.getNode()->getConstantOperandVal(1) == 1;
-    return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getIntPtrConstant(IsTrunc));
+    const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+    const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+    // If the first fp_round isn't a value preserving truncation, it might
+    // introduce a tie in the second fp_round, that wouldn't occur in the
+    // single-step fp_round we want to fold to.
+    // In other words, double rounding isn't the same as rounding.
+    // Also, this is a value preserving truncation iff both fp_round's are.
+    if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+                         DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+    }
   }
 
   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
@@ -7769,8 +8645,9 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
 
   // fold (fp_round_inreg c1fp) -> c1fp
   if (N0CFP && isTypeLegal(EVT)) {
-    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
-    return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
+    SDLoc DL(N);
+    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
+    return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
   }
 
   return SDValue();
@@ -7778,7 +8655,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
 
 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
@@ -7787,9 +8663,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
     return SDValue();
 
   // fold (fp_extend c1fp) -> c1fp
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
 
+  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
+  if (N0.getOpcode() == ISD::FP16_TO_FP &&
+      TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
+    return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
+
   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   // value of X.
   if (N0.getOpcode() == ISD::FP_ROUND
@@ -7813,7 +8694,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
-                          N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+                          N0.getValueType(), ExtLoad,
+                          DAG.getIntPtrConstant(1, SDLoc(N0))),
               ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
@@ -7823,11 +8705,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
 
 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (fceil c1) -> fceil(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -7835,11 +8716,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
 
 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (ftrunc c1) -> ftrunc(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -7847,11 +8727,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
 
 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (ffloor c1) -> ffloor(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -7862,14 +8741,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVUnaryOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
-
   // Constant fold FNEG.
-  if (isa<ConstantFPSDNode>(N0))
-    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
+  if (isConstantFPBuildVectorOrConstantFP(N0))
+    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
 
   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
                          &DAG.getTarget().Options))
@@ -7893,8 +8767,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
         // For a scalar, just generate 0x80...
         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
       }
-      Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
-                        DAG.getConstant(SignMask, IntVT));
+      SDLoc DL0(N0);
+      Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
+                        DAG.getConstant(SignMask, DL0, IntVT));
       AddToWorklist(Int.getNode());
       return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
     }
@@ -7927,7 +8802,7 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
   if (N0CFP && N1CFP) {
     const APFloat &C0 = N0CFP->getValueAPF();
     const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0));
+    return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
   }
 
   if (N0CFP) {
@@ -7948,7 +8823,7 @@ SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
   if (N0CFP && N1CFP) {
     const APFloat &C0 = N0CFP->getValueAPF();
     const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0));
+    return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
   }
 
   if (N0CFP) {
@@ -7964,13 +8839,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVUnaryOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
-
   // fold (fabs c1) -> fabs(c1)
-  if (isa<ConstantFPSDNode>(N0))
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
 
   // fold (fabs (fabs x)) -> (fabs x)
@@ -8000,8 +8870,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
         // For a scalar, just generate 0x7f...
         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
       }
-      Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
-                        DAG.getConstant(SignMask, IntVT));
+      SDLoc DL(N0);
+      Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
+                        DAG.getConstant(SignMask, DL, IntVT));
       AddToWorklist(Int.getNode());
       return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
     }
@@ -8071,13 +8942,14 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
 
         if (AndConst.isPowerOf2() &&
             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+          SDLoc DL(N);
           SDValue SetCC =
-            DAG.getSetCC(SDLoc(N),
+            DAG.getSetCC(DL,
                          getSetCCResultType(Op0.getValueType()),
-                         Op0, DAG.getConstant(0, Op0.getValueType()),
+                         Op0, DAG.getConstant(0, DL, Op0.getValueType()),
                          ISD::SETNE);
 
-          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
                                           MVT::Other, Chain, SetCC, N2);
           // Don't add the new BRCond into the worklist or else SimplifySelectCC
           // will convert it back to (X & C1) >> C2.
@@ -8130,12 +9002,11 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
 
     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
       bool Equal = false;
-      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
-        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
-            Op0.getOpcode() == ISD::XOR) {
-          TheXor = Op0.getNode();
-          Equal = true;
-        }
+      if (isOneConstant(Op0) && Op0.hasOneUse() &&
+          Op0.getOpcode() == ISD::XOR) {
+        TheXor = Op0.getNode();
+        Equal = true;
+      }
 
       EVT SetCCVT = N1.getValueType();
       if (LegalTypes)
@@ -8193,11 +9064,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
       return false;
-    VT = Use->getValueType(0);
+    VT = LD->getMemoryVT();
   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
       return false;
-    VT = ST->getValue().getValueType();
+    VT = ST->getMemoryVT();
   } else
     return false;
 
@@ -8280,8 +9151,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   }
 
   // Don't create a indexed load / store with zero offset.
-  if (isa<ConstantSDNode>(Offset) &&
-      cast<ConstantSDNode>(Offset)->isNullValue())
+  if (isNullConstant(Offset))
     return false;
 
   // Try turning it into a pre-indexed load / store except when:
@@ -8309,24 +9179,25 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   // a copy of the original base pointer.
   SmallVector<SDNode *, 16> OtherUses;
   if (isa<ConstantSDNode>(Offset))
-    for (SDNode *Use : BasePtr.getNode()->uses()) {
-      if (Use == Ptr.getNode())
+    for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
+                              UE = BasePtr.getNode()->use_end();
+         UI != UE; ++UI) {
+      SDUse &Use = UI.getUse();
+      // Skip the use that is Ptr and uses of other results from BasePtr's
+      // node (important for nodes that return multiple results).
+      if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
         continue;
 
-      if (Use->isPredecessorOf(N))
+      if (Use.getUser()->isPredecessorOf(N))
         continue;
 
-      if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+      if (Use.getUser()->getOpcode() != ISD::ADD &&
+          Use.getUser()->getOpcode() != ISD::SUB) {
         OtherUses.clear();
         break;
       }
 
-      SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
-      if (Op1.getNode() == BasePtr.getNode())
-        std::swap(Op0, Op1);
-      assert(Op0.getNode() == BasePtr.getNode() &&
-             "Use of ADD/SUB but not an operand");
-
+      SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
       if (!isa<ConstantSDNode>(Op1)) {
         OtherUses.clear();
         break;
@@ -8338,7 +9209,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
         break;
       }
 
-      OtherUses.push_back(Use);
+      OtherUses.push_back(Use.getUser());
     }
 
   if (Swapped)
@@ -8431,12 +9302,14 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
     else CNV = CNV - Offset1;
 
+    SDLoc DL(OtherUses[i]);
+
     // We can now generate the new expression.
-    SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+    SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
 
     SDValue NewUse = DAG.getNode(Opcode,
-                                 SDLoc(OtherUses[i]),
+                                 DL,
                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
     deleteAndRecombine(OtherUses[i]);
@@ -8494,8 +9367,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
     ISD::MemIndexedMode AM = ISD::UNINDEXED;
     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
       // Don't create a indexed load / store with zero offset.
-      if (isa<ConstantSDNode>(Offset) &&
-          cast<ConstantSDNode>(Offset)->isNullValue())
+      if (isNullConstant(Offset))
         continue;
 
       // Try turning it into a post-indexed load / store except when
@@ -8585,7 +9457,7 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
          "Cannot split out indexing using opaque target constants");
   if (Inc.getOpcode() == ISD::TargetConstant) {
     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
-    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(),
+    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
                           ConstInc->getValueType(0));
   }
 
@@ -8686,7 +9558,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                               LD->getMemoryVT(),
                               LD->isVolatile(), LD->isNonTemporal(),
                               LD->isInvariant(), Align, LD->getAAInfo());
-        return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+        if (NewLoad.getNode() != N)
+          return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
       }
     }
   }
@@ -8851,9 +9724,6 @@ struct LoadedSlice {
               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
 
-  LoadedSlice(const LoadedSlice &LS)
-      : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
-
   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
   /// \return Result is \p BitWidth and has used bits set to 1 and
   ///         not used bits set to 0.
@@ -8980,8 +9850,9 @@ struct LoadedSlice {
     if (Offset) {
       // BaseAddr = BaseAddr + Offset.
       EVT ArithType = BaseAddr.getValueType();
-      BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
-                              DAG->getConstant(Offset, ArithType));
+      SDLoc DL(Origin);
+      BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
+                              DAG->getConstant(Offset, DL, ArithType));
     }
 
     // Create the type of the loaded slice according to its size.
@@ -9336,7 +10207,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   if (NotMaskLZ == 64) return Result;  // All zero mask.
 
   // See if we have a continuous run of bits.  If so, we have 0*1+0*
-  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+  if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
     return Result;
 
   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
@@ -9387,10 +10258,12 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
 
   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   // shifted by ByteShift and truncated down to NumBytes.
-  if (ByteShift)
-    IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
-                       DAG.getConstant(ByteShift*8,
+  if (ByteShift) {
+    SDLoc DL(IVal);
+    IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8, DL,
                                     DC->getShiftAmountTy(IVal.getValueType())));
+  }
 
   // Figure out the offset for the store and the alignment of the access.
   unsigned StOffset;
@@ -9403,8 +10276,9 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
 
   SDValue Ptr = St->getBasePtr();
   if (StOffset) {
-    Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
-                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    SDLoc DL(IVal);
+    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
     NewAlign = MinAlign(NewAlign, StOffset);
   }
 
@@ -9486,8 +10360,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     // The narrowing should be profitable, the load/store operation should be
     // legal (or custom) and the store size should be equal to the NewVT width.
     while (NewBW < BitWidth &&
-           !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
-             TLI.isNarrowingProfitable(VT, NewVT))) {
+           (NewVT.getStoreSizeInBits() != NewBW ||
+            !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+            !TLI.isNarrowingProfitable(VT, NewVT))) {
       NewBW = NextPowerOf2(NewBW);
       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
     }
@@ -9517,7 +10392,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
                                    Ptr.getValueType(), Ptr,
-                                   DAG.getConstant(PtrOff, Ptr.getValueType()));
+                                   DAG.getConstant(PtrOff, SDLoc(LD),
+                                                   Ptr.getValueType()));
       SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
                                   LD->getChain(), NewPtr,
                                   LD->getPointerInfo().getWithOffset(PtrOff),
@@ -9525,7 +10401,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                   LD->isInvariant(), NewAlign,
                                   LD->getAAInfo());
       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
-                                   DAG.getConstant(NewImm, NewVT));
+                                   DAG.getConstant(NewImm, SDLoc(Value),
+                                                   NewVT));
       SDValue NewST = DAG.getStore(Chain, SDLoc(N),
                                    NewVal, NewPtr,
                                    ST->getPointerInfo().getWithOffset(PtrOff),
@@ -9599,6 +10476,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   return SDValue();
 }
 
+namespace {
 /// Helper struct to parse and store a memory address as base + index + offset.
 /// We ignore sign extensions when it is safe to do so.
 /// The following two expressions are not equivalent. To differentiate we need
@@ -9686,37 +10564,156 @@ struct BaseIndexOffset {
     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
   }
 };
+} // namespace
 
-/// Holds a pointer to an LSBaseSDNode as well as information on where it
-/// is located in a sequence of memory operations connected by a chain.
-struct MemOpLink {
-  MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
-    MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
-  // Ptr to the mem node.
-  LSBaseSDNode *MemNode;
-  // Offset from the base ptr.
-  int64_t OffsetFromBase;
-  // What is the sequence number of this mem node.
-  // Lowest mem operand in the DAG starts at zero.
-  unsigned SequenceNum;
-};
+bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+                  SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
+                  unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+  // Make sure we have something to merge.
+  if (NumElem < 2)
+    return false;
+
+  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+  unsigned LatestNodeUsed = 0;
+
+  for (unsigned i=0; i < NumElem; ++i) {
+    // Find a chain for the new wide-store operand. Notice that some
+    // of the store nodes that we found may not be selected for inclusion
+    // in the wide store. The chain we use needs to be the chain of the
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
+  }
+
+  // The latest Node in the DAG.
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+  SDLoc DL(StoreNodes[0].MemNode);
+
+  SDValue StoredVal;
+  if (UseVector) {
+    // Find a legal type for the vector store.
+    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+    assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+    if (IsConstantSrc) {
+      // A vector store with a constant source implies that the constant is
+      // zero; we only handle merging stores of constant zeros because the zero
+      // can be materialized without a load.
+      // It may be beneficial to loosen this restriction to allow non-zero
+      // store merging.
+      StoredVal = DAG.getConstant(0, DL, Ty);
+    } else {
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i < NumElem ; ++i) {
+        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+        SDValue Val = St->getValue();
+        // All of the operands of a BUILD_VECTOR must have the same type.
+        if (Val.getValueType() != MemVT)
+          return false;
+        Ops.push_back(Val);
+      }
+
+      // Build the extracted vector elements back into a vector.
+      StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
+    }
+  } else {
+    // We should always use a vector store when merging extracted vector
+    // elements, so this path implies a store of constants.
+    assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+    APInt StoreInt(StoreBW, 0);
+
+    // Construct a single integer constant which is made of the smaller
+    // constant inputs.
+    bool IsLE = TLI.isLittleEndian();
+    for (unsigned i = 0; i < NumElem ; ++i) {
+      unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+      SDValue Val = St->getValue();
+      StoreInt <<= ElementSizeBytes*8;
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+        StoreInt |= C->getAPIntValue().zext(StoreBW);
+      } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+        StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+      } else {
+        llvm_unreachable("Invalid constant element type");
+      }
+    }
+
+    // Create the new Load and Store operations.
+    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
+  }
+
+  SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
+                                  FirstInChain->getBasePtr(),
+                                  FirstInChain->getPointerInfo(),
+                                  false, false,
+                                  FirstInChain->getAlignment());
+
+  // Replace the last store with the new store
+  CombineTo(LatestOp, NewStore);
+  // Erase all other stores.
+  for (unsigned i = 0; i < NumElem ; ++i) {
+    if (StoreNodes[i].MemNode == LatestOp)
+      continue;
+    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+    // ReplaceAllUsesWith will replace all uses that existed when it was
+    // called, but graph optimizations may cause new ones to appear. For
+    // example, the case in pr14333 looks like
+    //
+    //  St's chain -> St -> another store -> X
+    //
+    // And the only difference from St to the other store is the chain.
+    // When we change it's chain to be St's chain they become identical,
+    // get CSEed and the net result is that X is now a use of St.
+    // Since we know that St is redundant, just iterate.
+    while (!St->use_empty())
+      DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+    deleteAndRecombine(St);
+  }
+
+  return true;
+}
+
+static bool allowableAlignment(const SelectionDAG &DAG,
+                               const TargetLowering &TLI, EVT EVTTy,
+                               unsigned AS, unsigned Align) {
+  if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
+    return true;
+
+  Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
+  unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+  return (Align >= ABIAlignment);
+}
 
 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
   EVT MemVT = St->getMemoryVT();
   int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
-  bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+      Attribute::NoImplicitFloat);
+
+  // This function cannot currently deal with non-byte-sized memory sizes.
+  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+    return false;
 
   // Don't merge vectors into wider inputs.
   if (MemVT.isVector() || !MemVT.isSimple())
     return false;
 
   // Perform an early exit check. Do not bother looking at stored values that
-  // are not constants or loads.
+  // are not constants, loads, or extracted vector elements.
   SDValue StoredVal = St->getValue();
   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
-  if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
-      !IsLoadSrc)
+  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+                       isa<ConstantFPSDNode>(StoredVal);
+  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
     return false;
 
   // Only look at ends of store sequences.
@@ -9761,10 +10758,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (!Ptr.equalBaseIndex(BasePtr))
       break;
 
-    // Check that the alignment is the same.
-    if (Index->getAlignment() != St->getAlignment())
-      break;
-
     // The memory operands must not be volatile.
     if (Index->isVolatile() || Index->isIndexed())
       break;
@@ -9778,11 +10771,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (Index->getMemoryVT() != MemVT)
       break;
 
-    // We do not allow unaligned stores because we want to prevent overriding
-    // stores.
-    if (Index->getAlignment()*8 != MemVT.getSizeInBits())
-      break;
-
     // We found a potential memory operand to merge.
     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
 
@@ -9856,9 +10844,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
   // The node with the lowest store address.
   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+  unsigned FirstStoreAlign = FirstInChain->getAlignment();
 
   // Store the constants into memory as one consecutive store.
-  if (!IsLoadSrc) {
+  if (IsConstantSrc) {
     unsigned LastLegalType = 0;
     unsigned LastLegalVectorType = 0;
     bool NonZero = false;
@@ -9878,27 +10868,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       // Find a legal type for the constant store.
       unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
       EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-      if (TLI.isTypeLegal(StoreTy))
+      if (TLI.isTypeLegal(StoreTy) &&
+          allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
+                             FirstStoreAlign)) {
         LastLegalType = i+1;
       // Or check whether a truncstore is legal.
-      else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
-               TargetLowering::TypePromoteInteger) {
+      } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+                 TargetLowering::TypePromoteInteger) {
         EVT LegalizedStoredValueTy =
           TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
-        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
-          LastLegalType = i+1;
+        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+            allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
+                               FirstStoreAlign)) {
+          LastLegalType = i + 1;
+        }
       }
 
       // Find a legal type for the vector store.
       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
-      if (TLI.isTypeLegal(Ty))
+      if (TLI.isTypeLegal(Ty) &&
+          allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
         LastLegalVectorType = i + 1;
+      }
     }
 
-    // We only use vectors if the constant is known to be zero and the
-    // function is not marked with the noimplicitfloat attribute.
-    if (NonZero || NoVectors)
+
+    // We only use vectors if the constant is known to be zero or the target
+    // allows it and the function is not marked with the noimplicitfloat
+    // attribute.
+    if (NoVectors) {
       LastLegalVectorType = 0;
+    } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
+                                                            LastLegalVectorType,
+                                                            FirstStoreAS)) {
+      LastLegalVectorType = 0;
+    }
 
     // Check if we found a legal integer type to store.
     if (LastLegalType == 0 && LastLegalVectorType == 0)
@@ -9907,85 +10911,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
 
-    // Make sure we have something to merge.
-    if (NumElem < 2)
-      return false;
-
-    unsigned EarliestNodeUsed = 0;
-    for (unsigned i=0; i < NumElem; ++i) {
-      // Find a chain for the new wide-store operand. Notice that some
-      // of the store nodes that we found may not be selected for inclusion
-      // in the wide store. The chain we use needs to be the chain of the
-      // earliest store node which is *used* and replaced by the wide store.
-      if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-        EarliestNodeUsed = i;
-    }
+    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+                                           true, UseVector);
+  }
 
-    // The earliest Node in the DAG.
-    LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
-    SDLoc DL(StoreNodes[0].MemNode);
+  // When extracting multiple vector elements, try to store them
+  // in one vector store rather than a sequence of scalar stores.
+  if (IsExtractVecEltSrc) {
+    unsigned NumElem = 0;
+    for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
+      SDValue StoredVal = St->getValue();
+      // This restriction could be loosened.
+      // Bail out if any stored values are not elements extracted from a vector.
+      // It should be possible to handle mixed sources, but load sources need
+      // more careful handling (see the block of code below that handles
+      // consecutive loads).
+      if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+        return false;
 
-    SDValue StoredVal;
-    if (UseVector) {
       // Find a legal type for the vector store.
-      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
-      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
-      StoredVal = DAG.getConstant(0, Ty);
-    } else {
-      unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-      APInt StoreInt(StoreBW, 0);
-
-      // Construct a single integer constant which is made of the smaller
-      // constant inputs.
-      bool IsLE = TLI.isLittleEndian();
-      for (unsigned i = 0; i < NumElem ; ++i) {
-        unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
-        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
-        SDValue Val = St->getValue();
-        StoreInt<<=ElementSizeBytes*8;
-        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
-          StoreInt|=C->getAPIntValue().zext(StoreBW);
-        } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
-          StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
-        } else {
-          llvm_unreachable("Invalid constant element type");
-        }
-      }
-
-      // Create the new Load and Store operations.
-      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-      StoredVal = DAG.getConstant(StoreInt, StoreTy);
-    }
-
-    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
-                                    FirstInChain->getBasePtr(),
-                                    FirstInChain->getPointerInfo(),
-                                    false, false,
-                                    FirstInChain->getAlignment());
-
-    // Replace the first store with the new store
-    CombineTo(EarliestOp, NewStore);
-    // Erase all other stores.
-    for (unsigned i = 0; i < NumElem ; ++i) {
-      if (StoreNodes[i].MemNode == EarliestOp)
-        continue;
-      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
-      // ReplaceAllUsesWith will replace all uses that existed when it was
-      // called, but graph optimizations may cause new ones to appear. For
-      // example, the case in pr14333 looks like
-      //
-      //  St's chain -> St -> another store -> X
-      //
-      // And the only difference from St to the other store is the chain.
-      // When we change it's chain to be St's chain they become identical,
-      // get CSEed and the net result is that X is now a use of St.
-      // Since we know that St is redundant, just iterate.
-      while (!St->use_empty())
-        DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
-      deleteAndRecombine(St);
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+      if (TLI.isTypeLegal(Ty) &&
+          allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
+        NumElem = i + 1;
     }
 
-    return true;
+    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+                                           false, true);
   }
 
   // Below we handle the case of multiple consecutive stores that
@@ -10007,10 +10960,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (!Ld->hasNUsesOfValue(1, 0))
       break;
 
-    // Check that the alignment is the same as the stores.
-    if (Ld->getAlignment() != St->getAlignment())
-      break;
-
     // The memory operands must not be volatile.
     if (Ld->isVolatile() || Ld->isIndexed())
       break;
@@ -10048,6 +10997,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       St->getAlignment() >= RequiredAlignment)
     return false;
 
+  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+  unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+  unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
   // Scan the memory operations on the chain and find the first non-consecutive
   // load memory address. These variables hold the index in the store node
   // array.
@@ -10056,7 +11009,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   unsigned LastLegalVectorType = 0;
   unsigned LastLegalIntegerType = 0;
   StartAddress = LoadNodes[0].OffsetFromBase;
-  SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+  SDValue FirstChain = FirstLoad->getChain();
   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
     // All loads much share the same chain.
     if (LoadNodes[i].MemNode->getChain() != FirstChain)
@@ -10069,13 +11022,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
     // Find a legal type for the vector store.
     EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
-    if (TLI.isTypeLegal(StoreTy))
+    if (TLI.isTypeLegal(StoreTy) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
       LastLegalVectorType = i + 1;
+    }
 
     // Find a legal type for the integer store.
     unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
     StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-    if (TLI.isTypeLegal(StoreTy))
+    if (TLI.isTypeLegal(StoreTy) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
       LastLegalIntegerType = i + 1;
     // Or check whether a truncstore and extload is legal.
     else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
@@ -10085,7 +11043,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
-          TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy))
+          TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+          allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
+                             FirstStoreAlign) &&
+          allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
+                             FirstLoadAlign))
         LastLegalIntegerType = i+1;
     }
   }
@@ -10103,18 +11065,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (NumElem < 2)
     return false;
 
-  // The earliest Node in the DAG.
-  unsigned EarliestNodeUsed = 0;
-  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  // The latest Node in the DAG.
+  unsigned LatestNodeUsed = 0;
   for (unsigned i=1; i<NumElem; ++i) {
     // Find a chain for the new wide-store operand. Notice that some
     // of the store nodes that we found may not be selected for inclusion
     // in the wide store. The chain we use needs to be the chain of the
-    // earliest store node which is *used* and replaced by the wide store.
-    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-      EarliestNodeUsed = i;
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
   }
 
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+
   // Find if it is better to use vectors or integers to load and store
   // to memory.
   EVT JointMemOpVT;
@@ -10128,18 +11091,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   SDLoc LoadDL(LoadNodes[0].MemNode);
   SDLoc StoreDL(StoreNodes[0].MemNode);
 
-  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
-  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
-                                FirstLoad->getChain(),
-                                FirstLoad->getBasePtr(),
-                                FirstLoad->getPointerInfo(),
-                                false, false, false,
-                                FirstLoad->getAlignment());
-
-  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
-                                  FirstInChain->getBasePtr(),
-                                  FirstInChain->getPointerInfo(), false, false,
-                                  FirstInChain->getAlignment());
+  SDValue NewLoad = DAG.getLoad(
+      JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
+      FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+
+  SDValue NewStore = DAG.getStore(
+      LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+      FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
 
   // Replace one of the loads with the new load.
   LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
@@ -10154,12 +11112,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
   }
 
-  // Replace the first store with the new store.
-  CombineTo(EarliestOp, NewStore);
+  // Replace the last store with the new store.
+  CombineTo(LatestOp, NewStore);
   // Erase all other stores.
   for (unsigned i = 0; i < NumElem ; ++i) {
     // Remove all Store nodes.
-    if (StoreNodes[i].MemNode == EarliestOp)
+    if (StoreNodes[i].MemNode == LatestOp)
       continue;
     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
     DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
@@ -10214,8 +11172,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       case MVT::f32:
         if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          ;
           Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
-                              bitcastToAPInt().getZExtValue(), MVT::i32);
+                              bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+                              MVT::i32);
           return DAG.getStore(Chain, SDLoc(N), Tmp,
                               Ptr, ST->getMemOperand());
         }
@@ -10224,8 +11184,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
              !ST->isVolatile()) ||
             TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+          ;
           Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
-                                getZExtValue(), MVT::i64);
+                                getZExtValue(), SDLoc(CFP), MVT::i64);
           return DAG.getStore(Chain, SDLoc(N), Tmp,
                               Ptr, ST->getMemOperand());
         }
@@ -10236,8 +11197,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           // argument passing.  Since this is so common, custom legalize the
           // 64-bit integer store into two 32-bit stores.
           uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
-          SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+          SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
           if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
           unsigned Alignment = ST->getAlignment();
@@ -10245,18 +11206,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           bool isNonTemporal = ST->isNonTemporal();
           AAMDNodes AAInfo = ST->getAAInfo();
 
+          SDLoc DL(N);
+
           SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
                                      Ptr, ST->getPointerInfo(),
                                      isVolatile, isNonTemporal,
                                      ST->getAlignment(), AAInfo);
-          Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
-                            DAG.getConstant(4, Ptr.getValueType()));
+          Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                            DAG.getConstant(4, DL, Ptr.getValueType()));
           Alignment = MinAlign(Alignment, 4U);
           SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
                                      Ptr, ST->getPointerInfo().getWithOffset(4),
                                      isVolatile, isNonTemporal,
                                      Alignment, AAInfo);
-          return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
+          return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                              St0, St1);
         }
 
@@ -10268,11 +11231,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   // Try to infer better alignment information than the store already has.
   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > ST->getAlignment())
-        return DAG.getTruncStore(Chain, SDLoc(N), Value,
+      if (Align > ST->getAlignment()) {
+        SDValue NewStore =
+               DAG.getTruncStore(Chain, SDLoc(N), Value,
                                  Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                                  ST->isVolatile(), ST->isNonTemporal(), Align,
                                  ST->getAAInfo());
+        if (NewStore.getNode() != N)
+          return CombineTo(ST, NewStore, true);
+      }
     }
   }
 
@@ -10493,24 +11460,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   SDValue Offset;
   EVT PtrType = NewPtr.getValueType();
   MachinePointerInfo MPI;
+  SDLoc DL(EVE);
   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
     int Elt = ConstEltNo->getZExtValue();
     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
-    if (TLI.isBigEndian())
-      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
-    Offset = DAG.getConstant(PtrOff, PtrType);
+    Offset = DAG.getConstant(PtrOff, DL, PtrType);
     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
   } else {
+    Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
     Offset = DAG.getNode(
-        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
-        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
-    if (TLI.isBigEndian())
-      Offset = DAG.getNode(
-          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
-          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+        ISD::MUL, DL, PtrType, Offset,
+        DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
     MPI = OriginalLoad->getPointerInfo();
   }
-  NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
 
   // The replacement we need to do here is a little tricky: we need to
   // replace an extractelement of a load with a load.
@@ -10620,8 +11583,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
     if (!LegalOperations) {
       EVT IndexTy = TLI.getVectorIdxTy();
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
-                         SVInVec, DAG.getConstant(OrigElt, IndexTy));
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
+                         DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
     }
   }
 
@@ -10710,7 +11673,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
-        EltNo = DAG.getConstant(Elt, EltNo.getValueType());
+        EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
       }
     }
 
@@ -10800,7 +11763,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   assert(ElemRatio > 1 && "Invalid element size ratio");
   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
-                               DAG.getConstant(0, SourceType);
+                               DAG.getConstant(0, SDLoc(N), SourceType);
 
   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
@@ -10890,6 +11853,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
     return SDValue();
 
+  // Just because the floating-point vector type is legal does not necessarily
+  // mean that the corresponding integer vector type is.
+  if (!isTypeLegal(NVT))
+    return SDValue();
+
   SmallVector<SDValue, 8> Opnds;
   for (unsigned i = 0; i != NumInScalars; ++i) {
     SDValue In = N->getOperand(i);
@@ -10914,12 +11882,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(VT);
 
-  SDValue V = reduceBuildVecExtToExtBuildVec(N);
-  if (V.getNode())
+  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
     return V;
 
-  V = reduceBuildVecConvertToConvertBuildVec(N);
-  if (V.getNode())
+  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
     return V;
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -10942,8 +11908,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     if (Op.getOpcode() == ISD::UNDEF) continue;
 
     // See if we can combine this build_vector into a blend with a zero vector.
-    if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant &&
-        cast<ConstantSDNode>(Op.getNode())->isNullValue()) ||
+    if (!VecIn2.getNode() && (isNullConstant(Op) ||
         (Op.getOpcode() == ISD::ConstantFP &&
         cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
       UsesZeroVector = true;
@@ -11047,20 +12012,20 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
 
         if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
           return SDValue();
-        
+
         // Try to replace VecIn1 with two extract_subvectors
         // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 
-          DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
+        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+          DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy()));
         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-          DAG.getConstant(0, TLI.getVectorIdxTy()));
+          DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
       } else
         return SDValue();
     }
 
     if (UsesZeroVector)
-      VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
-                                DAG.getConstantFP(0.0, VT);
+      VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
+                                DAG.getConstantFP(0.0, dl, VT);
     else
       // If VecIn2 is unused then change it to undef.
       VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
@@ -11081,6 +12046,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   return SDValue();
 }
 
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT OpVT = N->getOperand(0).getValueType();
+
+  // If the operands are legal vectors, leave them alone.
+  if (TLI.isTypeLegal(OpVT))
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  SmallVector<SDValue, 8> Ops;
+
+  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+  // Keep track of what we encounter.
+  bool AnyInteger = false;
+  bool AnyFP = false;
+  for (const SDValue &Op : N->ops()) {
+    if (ISD::BITCAST == Op.getOpcode() &&
+        !Op.getOperand(0).getValueType().isVector())
+      Ops.push_back(Op.getOperand(0));
+    else if (ISD::UNDEF == Op.getOpcode())
+      Ops.push_back(ScalarUndef);
+    else
+      return SDValue();
+
+    // Note whether we encounter an integer or floating point scalar.
+    // If it's neither, bail out, it could be something weird like x86mmx.
+    EVT LastOpVT = Ops.back().getValueType();
+    if (LastOpVT.isFloatingPoint())
+      AnyFP = true;
+    else if (LastOpVT.isInteger())
+      AnyInteger = true;
+    else
+      return SDValue();
+  }
+
+  // If any of the operands is a floating point scalar bitcast to a vector,
+  // use floating point types throughout, and bitcast everything.  
+  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+  if (AnyFP) {
+    SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+    ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+    if (AnyInteger) {
+      for (SDValue &Op : Ops) {
+        if (Op.getValueType() == SVT)
+          continue;
+        if (Op.getOpcode() == ISD::UNDEF)
+          Op = ScalarUndef;
+        else
+          Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
+      }
+    }
+  }
+
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+                               VT.getSizeInBits() / SVT.getSizeInBits());
+  return DAG.getNode(ISD::BITCAST, DL, VT,
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
+}
+
 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
   // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
@@ -11096,9 +12123,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(VT);
 
-  // Optimize concat_vectors where one of the vectors is undef.
-  if (N->getNumOperands() == 2 &&
-      N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+  // Optimize concat_vectors where all but the first of the vectors are undef.
+  if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+        return Op.getOpcode() == ISD::UNDEF;
+      })) {
     SDValue In = N->getOperand(0);
     assert(In.getValueType().isVector() && "Must concat vectors");
 
@@ -11106,6 +12134,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     if (In->getOpcode() == ISD::BITCAST &&
         !In->getOperand(0)->getValueType(0).isVector()) {
       SDValue Scalar = In->getOperand(0);
+
+      // If the bitcast type isn't legal, it might be a trunc of a legal type;
+      // look through the trunc so we can still do the transform:
+      //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+      if (Scalar->getOpcode() == ISD::TRUNCATE &&
+          !TLI.isTypeLegal(Scalar.getValueType()) &&
+          TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+        Scalar = Scalar->getOperand(0);
+
       EVT SclTy = Scalar->getValueType(0);
 
       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
@@ -11122,39 +12159,61 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     }
   }
 
+  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+  // We have already tested above for an UNDEF only concatenation.
   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
-  if (N->getNumOperands() == 2 &&
-      N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
-      N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
-    EVT VT = N->getValueType(0);
-    SDValue N0 = N->getOperand(0);
-    SDValue N1 = N->getOperand(1);
+  auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+    return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+  };
+  bool AllBuildVectorsOrUndefs =
+      std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+  if (AllBuildVectorsOrUndefs) {
     SmallVector<SDValue, 8> Opnds;
-    unsigned BuildVecNumElts =  N0.getNumOperands();
-
-    EVT SclTy0 = N0.getOperand(0)->getValueType(0);
-    EVT SclTy1 = N1.getOperand(0)->getValueType(0);
-    if (SclTy0.isFloatingPoint()) {
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(N0.getOperand(i));
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(N1.getOperand(i));
-    } else {
+    EVT SVT = VT.getScalarType();
+
+    EVT MinVT = SVT;
+    if (!SVT.isFloatingPoint()) {
       // If BUILD_VECTOR are from built from integer, they may have different
-      // operand types. Get the smaller type and truncate all operands to it.
-      EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
-                        N0.getOperand(i)));
-      for (unsigned i = 0; i != BuildVecNumElts; ++i)
-        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
-                        N1.getOperand(i)));
+      // operand types. Get the smallest type and truncate all operands to it.
+      bool FoundMinVT = false;
+      for (const SDValue &Op : N->ops())
+        if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+          EVT OpSVT = Op.getOperand(0)->getValueType(0);
+          MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+          FoundMinVT = true;
+        }
+      assert(FoundMinVT && "Concat vector type mismatch");
     }
 
+    for (const SDValue &Op : N->ops()) {
+      EVT OpVT = Op.getValueType();
+      unsigned NumElts = OpVT.getVectorNumElements();
+
+      if (ISD::UNDEF == Op.getOpcode())
+        Opnds.append(NumElts, DAG.getUNDEF(MinVT));
+
+      if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+        if (SVT.isFloatingPoint()) {
+          assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+          Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
+        } else {
+          for (unsigned i = 0; i != NumElts; ++i)
+            Opnds.push_back(
+                DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+        }
+      }
+    }
+
+    assert(VT.getVectorNumElements() == Opnds.size() &&
+           "Concat vector type mismatch");
     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   }
 
+  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+    return V;
+
   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   // nodes often generate nop CONCAT_VECTOR nodes.
   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -11216,7 +12275,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     // type.
     if (V->getOperand(0).getValueType() != NVT)
       return SDValue();
-    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned Idx = N->getConstantOperandVal(1);
     unsigned NumElems = NVT.getVectorNumElements();
     assert((Idx % NumElems) == 0 &&
            "IDX in concat is not a multiple of the result vector length.");
@@ -11347,7 +12406,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
 }
 
-// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -11361,6 +12421,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
   unsigned NumConcats = NumElts / NumElemsPerConcat;
 
+  // Special case: shuffle(concat(A,B)) can be more efficiently represented
+  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+  // half vector elements.
+  if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
+      std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
+                  SVN->getMask().end(), [](int i) { return i == -1; })) {
+    N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
+                              ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+    N1 = DAG.getUNDEF(ConcatVT);
+    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+  }
+
   // Look at every vector that's inserted. We're looking for exact
   // subvector-sized copies from a concatenated vector
   for (unsigned I = 0; I != NumConcats; ++I) {
@@ -11459,7 +12531,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   }
 
   // If it is a splat, check if the argument vector is another splat or a
-  // build_vector with all scalar elements the same.
+  // build_vector.
   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
     SDNode *V = N0.getNode();
 
@@ -11496,6 +12568,18 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       // Splat of <x, x, x, x>, return <x, x, x, x>
       if (AllSame)
         return N0;
+
+      // Canonicalize any other splat as a build_vector.
+      const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+      SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+      SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+                                  V->getValueType(0), Ops);
+
+      // We may have jumped through bitcasts, so the type of the
+      // BUILD_VECTOR may not match the type of the shuffle.
+      if (V->getValueType(0) != VT)
+        NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+      return NewBV;
     }
   }
 
@@ -11516,6 +12600,118 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       return V;
   }
 
+  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+    SmallVector<SDValue, 8> Ops;
+    for (int M : SVN->getMask()) {
+      SDValue Op = DAG.getUNDEF(VT.getScalarType());
+      if (M >= 0) {
+        int Idx = M % NumElts;
+        SDValue &S = (M < (int)NumElts ? N0 : N1);
+        if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+          Op = S.getOperand(Idx);
+        } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
+          if (Idx == 0)
+            Op = S.getOperand(0);
+        } else {
+          // Operand can't be combined - bail out.
+          break;
+        }
+      }
+      Ops.push_back(Op);
+    }
+    if (Ops.size() == VT.getVectorNumElements()) {
+      // BUILD_VECTOR requires all inputs to be of the same type, find the
+      // maximum type and extend them all.
+      EVT SVT = VT.getScalarType();
+      if (SVT.isInteger())
+        for (SDValue &Op : Ops)
+          SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+      if (SVT != VT.getScalarType())
+        for (SDValue &Op : Ops)
+          Op = TLI.isZExtFree(Op.getValueType(), SVT)
+                   ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
+                   : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
+      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
+    }
+  }
+
+  // If this shuffle only has a single input that is a bitcasted shuffle,
+  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+  // back to their original types.
+  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+      N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
+      TLI.isTypeLegal(VT)) {
+
+    // Peek through the bitcast only if there is one user.
+    SDValue BC0 = N0;
+    while (BC0.getOpcode() == ISD::BITCAST) {
+      if (!BC0.hasOneUse())
+        break;
+      BC0 = BC0.getOperand(0);
+    }
+
+    auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
+      if (Scale == 1)
+        return SmallVector<int, 8>(Mask.begin(), Mask.end());
+
+      SmallVector<int, 8> NewMask;
+      for (int M : Mask)
+        for (int s = 0; s != Scale; ++s)
+          NewMask.push_back(M < 0 ? -1 : Scale * M + s);
+      return NewMask;
+    };
+
+    if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+      EVT SVT = VT.getScalarType();
+      EVT InnerVT = BC0->getValueType(0);
+      EVT InnerSVT = InnerVT.getScalarType();
+
+      // Determine which shuffle works with the smaller scalar type.
+      EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+      EVT ScaleSVT = ScaleVT.getScalarType();
+
+      if (TLI.isTypeLegal(ScaleVT) &&
+          0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+          0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+
+        int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+        int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+        // Scale the shuffle masks to the smaller scalar type.
+        ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+        SmallVector<int, 8> InnerMask =
+            ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
+        SmallVector<int, 8> OuterMask =
+            ScaleShuffleMask(SVN->getMask(), OuterScale);
+
+        // Merge the shuffle masks.
+        SmallVector<int, 8> NewMask;
+        for (int M : OuterMask)
+          NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+        // Test for shuffle mask legality over both commutations.
+        SDValue SV0 = BC0->getOperand(0);
+        SDValue SV1 = BC0->getOperand(1);
+        bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+        if (!LegalMask) {
+          std::swap(SV0, SV1);
+          ShuffleVectorSDNode::commuteMask(NewMask);
+          LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+        }
+
+        if (LegalMask) {
+          SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
+          SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
+          return DAG.getNode(
+              ISD::BITCAST, SDLoc(N), VT,
+              DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+        }
+      }
+    }
+  }
+
   // Canonicalize shuffles according to rules:
   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
@@ -11543,8 +12739,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   // Don't try to fold shuffles with illegal type.
-  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
-      TLI.isTypeLegal(VT)) {
+  // Only fold if this shuffle is the only user of the other shuffle.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+      Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
 
     // The incoming shuffle must be of the same type as the result of the
@@ -11624,20 +12821,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 
     // Avoid introducing shuffles with illegal mask.
     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
-      // Compute the commuted shuffle mask and test again.
-      for (unsigned i = 0; i != NumElts; ++i) {
-        int idx = Mask[i];
-        if (idx < 0)
-          continue;
-        else if (idx < (int)NumElts)
-          Mask[i] = idx + NumElts;
-        else
-          Mask[i] = idx - NumElts;
-      }
+      ShuffleVectorSDNode::commuteMask(Mask);
 
       if (!TLI.isShuffleMaskLegal(Mask, VT))
         return SDValue();
- 
+
       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
@@ -11653,6 +12841,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
+  SDValue InVal = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
+  // with a VECTOR_SHUFFLE.
+  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+    SDValue InVec = InVal->getOperand(0);
+    SDValue EltNo = InVal->getOperand(1);
+
+    // FIXME: We could support implicit truncation if the shuffle can be
+    // scaled to a smaller vector scalar type.
+    ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
+    if (C0 && VT == InVec.getValueType() &&
+        VT.getScalarType() == InVal.getValueType()) {
+      SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+      int Elt = C0->getZExtValue();
+      NewMask[0] = Elt;
+
+      if (TLI.isShuffleMaskLegal(NewMask, VT))
+        return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
+                                    NewMask);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N2 = N->getOperand(2);
@@ -11680,48 +12896,64 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+
+  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
+  if (N0->getOpcode() == ISD::FP16_TO_FP)
+    return N0->getOperand(0);
+
+  return SDValue();
+}
+
 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
 /// with the destination vector and a zero vector.
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   EVT VT = N->getValueType(0);
-  SDLoc dl(N);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
-  if (N->getOpcode() == ISD::AND) {
-    if (RHS.getOpcode() == ISD::BITCAST)
-      RHS = RHS.getOperand(0);
-    if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
-      SmallVector<int, 8> Indices;
-      unsigned NumElts = RHS.getNumOperands();
-      for (unsigned i = 0; i != NumElts; ++i) {
-        SDValue Elt = RHS.getOperand(i);
-        if (!isa<ConstantSDNode>(Elt))
-          return SDValue();
+  SDLoc dl(N);
 
-        if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
-          Indices.push_back(i);
-        else if (cast<ConstantSDNode>(Elt)->isNullValue())
-          Indices.push_back(NumElts+i);
-        else
-          return SDValue();
-      }
+  // Make sure we're not running after operation legalization where it 
+  // may have custom lowered the vector shuffles.
+  if (LegalOperations)
+    return SDValue();
 
-      // Let's see if the target supports this vector_shuffle.
-      EVT RVT = RHS.getValueType();
-      if (!TLI.isVectorClearMaskLegal(Indices, RVT))
-        return SDValue();
+  if (N->getOpcode() != ISD::AND)
+    return SDValue();
+
+  if (RHS.getOpcode() == ISD::BITCAST)
+    RHS = RHS.getOperand(0);
 
-      // Return the new VECTOR_SHUFFLE node.
-      EVT EltVT = RVT.getVectorElementType();
-      SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
-                                     DAG.getConstant(0, EltVT));
-      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
-      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
-      SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
-      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+  if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+    SmallVector<int, 8> Indices;
+    unsigned NumElts = RHS.getNumOperands();
+
+    for (unsigned i = 0; i != NumElts; ++i) {
+      SDValue Elt = RHS.getOperand(i);
+      if (isAllOnesConstant(Elt))
+        Indices.push_back(i);
+      else if (isNullConstant(Elt))
+        Indices.push_back(NumElts+i);
+      else
+        return SDValue();
     }
+
+    // Let's see if the target supports this vector_shuffle.
+    EVT RVT = RHS.getValueType();
+    if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+      return SDValue();
+
+    // Return the new VECTOR_SHUFFLE node.
+    EVT EltVT = RVT.getVectorElementType();
+    SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+                                   DAG.getConstant(0, dl, EltVT));
+    SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
+    LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+    SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
   }
 
   return SDValue();
@@ -11734,8 +12966,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
 
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
-  SDValue Shuffle = XformToShuffleWithZero(N);
-  if (Shuffle.getNode()) return Shuffle;
+
+  if (SDValue Shuffle = XformToShuffleWithZero(N))
+    return Shuffle;
 
   // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
   // this operation.
@@ -11754,9 +12987,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
       // Can't fold divide by zero.
       if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
           N->getOpcode() == ISD::FDIV) {
-        if ((RHSOp.getOpcode() == ISD::Constant &&
-             cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
-            (RHSOp.getOpcode() == ISD::ConstantFP &&
+        if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
              cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
           break;
       }
@@ -11813,38 +13044,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   return SDValue();
 }
 
-/// Visit a binary vector operation, like FABS/FNEG.
-SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
-  assert(N->getValueType(0).isVector() &&
-         "SimplifyVUnaryOp only works on vectors!");
-
-  SDValue N0 = N->getOperand(0);
-
-  if (N0.getOpcode() != ISD::BUILD_VECTOR)
-    return SDValue();
-
-  // Operand is a BUILD_VECTOR node, see if we can constant fold it.
-  SmallVector<SDValue, 8> Ops;
-  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
-    SDValue Op = N0.getOperand(i);
-    if (Op.getOpcode() != ISD::UNDEF &&
-        Op.getOpcode() != ISD::ConstantFP)
-      break;
-    EVT EltVT = Op.getValueType();
-    SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
-    if (FoldOp.getOpcode() != ISD::UNDEF &&
-        FoldOp.getOpcode() != ISD::ConstantFP)
-      break;
-    Ops.push_back(FoldOp);
-    AddToWorklist(FoldOp.getNode());
-  }
-
-  if (Ops.size() != N0.getNumOperands())
-    return SDValue();
-
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
-}
-
 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
                                     SDValue N1, SDValue N2){
   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
@@ -11881,6 +13080,38 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                     SDValue RHS) {
 
+  // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+  // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
+  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
+    if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
+      // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
+      SDValue Sqrt = RHS;
+      ISD::CondCode CC;
+      SDValue CmpLHS;
+      const ConstantFPSDNode *NegZero = nullptr;
+
+      if (TheSelect->getOpcode() == ISD::SELECT_CC) {
+        CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+        CmpLHS = TheSelect->getOperand(0);
+        NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+      } else {
+        // SELECT or VSELECT
+        SDValue Cmp = TheSelect->getOperand(0);
+        if (Cmp.getOpcode() == ISD::SETCC) {
+          CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+          CmpLHS = Cmp.getOperand(0);
+          NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
+        }
+      }
+      if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
+          Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
+          CC == ISD::SETULT || CC == ISD::SETLT)) {
+        // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+        CombineTo(TheSelect, Sqrt);
+        return true;
+      }
+    }
+  }
   // Cannot simplify select with vector condition
   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
 
@@ -11902,6 +13133,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
     if (LHS.getOperand(0) != RHS.getOperand(0) ||
         // Do not let this transformation reduce the number of volatile loads.
         LLD->isVolatile() || RLD->isVolatile() ||
+        // FIXME: If either is a pre/post inc/dec load,
+        // we'd need to split out the address adjustment.
+        LLD->isIndexed() || RLD->isIndexed() ||
         // If this is an EXTLOAD, the VT's must match.
         LLD->getMemoryVT() != RLD->getMemoryVT() ||
         // If this is an EXTLOAD, the kind of extension must match.
@@ -12003,20 +13237,17 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   EVT VT = N2.getValueType();
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
-  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
 
   // Determine if the condition we're dealing with is constant
   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
                               N0, N1, CC, DL, false);
   if (SCC.getNode()) AddToWorklist(SCC.getNode());
-  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
 
-  // fold select_cc true, x, y -> x
-  if (SCCC && !SCCC->isNullValue())
-    return N2;
-  // fold select_cc false, x, y -> y
-  if (SCCC && SCCC->isNullValue())
-    return N3;
+  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+    // fold select_cc true, x, y -> x
+    // fold select_cc false, x, y -> y
+    return !SCCC->isNullValue() ? N2 : N3;
+  }
 
   // Check to see if we can simplify the select into an fabs node
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
@@ -12069,9 +13300,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
 
         // Get the offsets to the 0 and 1 element of the array so that we can
         // select between them.
-        SDValue Zero = DAG.getIntPtrConstant(0);
+        SDValue Zero = DAG.getIntPtrConstant(0, DL);
         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
-        SDValue One = DAG.getIntPtrConstant(EltSize);
+        SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
 
         SDValue Cond = DAG.getSetCC(DL,
                                     getSetCCResultType(N0.getValueType()),
@@ -12086,24 +13317,23 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                            MachinePointerInfo::getConstantPool(), false,
                            false, false, Alignment);
-
       }
     }
 
   // Check to see if we can perform the "gzip trick", transforming
   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
-  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
-      (N1C->isNullValue() ||                         // (a < 0) ? b : 0
-       (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
+  if (isNullConstant(N3) && CC == ISD::SETLT &&
+      (isNullConstant(N1) ||                 // (a < 0) ? b : 0
+       (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
     EVT XType = N0.getValueType();
     EVT AType = N2.getValueType();
     if (XType.bitsGE(AType)) {
       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
       // single-bit constant.
-      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
         unsigned ShCtV = N2C->getAPIntValue().logBase2();
-        ShCtV = XType.getSizeInBits()-ShCtV-1;
-        SDValue ShCt = DAG.getConstant(ShCtV,
+        ShCtV = XType.getSizeInBits() - ShCtV - 1;
+        SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
                                        getShiftAmountTy(N0.getValueType()));
         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
                                     XType, N0, ShCt);
@@ -12119,7 +13349,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
 
       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
                                   XType, N0,
-                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                  DAG.getConstant(XType.getSizeInBits() - 1,
+                                                  SDLoc(N0),
                                          getShiftAmountTy(N0.getValueType())));
       AddToWorklist(Shift.getNode());
 
@@ -12139,23 +13370,21 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   // single bit-test can be materialized as an all-ones register with
   // shift-left and shift-right-arith.
   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
-      N0->getValueType(0) == VT &&
-      N1C && N1C->isNullValue() &&
-      N2C && N2C->isNullValue()) {
+      N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
     SDValue AndLHS = N0->getOperand(0);
     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
       // Shift the tested bit over the sign bit.
       APInt AndMask = ConstAndRHS->getAPIntValue();
       SDValue ShlAmt =
-        DAG.getConstant(AndMask.countLeadingZeros(),
+        DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
                         getShiftAmountTy(AndLHS.getValueType()));
       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
 
       // Now arithmetic right shift it all the way over, so the result is either
       // all-ones, or zero.
       SDValue ShrAmt =
-        DAG.getConstant(AndMask.getBitWidth()-1,
+        DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
                         getShiftAmountTy(Shl.getValueType()));
       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
 
@@ -12164,13 +13393,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   }
 
   // fold select C, 16, 0 -> shl C, 4
-  if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
       TLI.getBooleanContents(N0.getValueType()) ==
           TargetLowering::ZeroOrOneBooleanContent) {
 
     // If the caller doesn't want us to simplify this into a zext of a compare,
     // don't do it.
-    if (NotExtCompare && N2C->getAPIntValue() == 1)
+    if (NotExtCompare && N2C->isOne())
       return SDValue();
 
     // Get a SetCC of the condition
@@ -12198,13 +13427,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
       AddToWorklist(SCC.getNode());
       AddToWorklist(Temp.getNode());
 
-      if (N2C->getAPIntValue() == 1)
+      if (N2C->isOne())
         return Temp;
 
       // shl setcc result by log2 n2c
       return DAG.getNode(
           ISD::SHL, DL, N2.getValueType(), Temp,
-          DAG.getConstant(N2C->getAPIntValue().logBase2(),
+          DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
                           getShiftAmountTy(Temp.getValueType())));
     }
   }
@@ -12212,7 +13441,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   // Check to see if this is the equivalent of setcc
   // FIXME: Turn all of these into setcc if setcc if setcc is legal
   // otherwise, go ahead with the folds.
-  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+  if (0 && isNullConstant(N3) && isOneConstant(N2)) {
     EVT XType = N0.getValueType();
     if (!LegalOperations ||
         TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
@@ -12223,30 +13452,34 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
     }
 
     // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
-    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+    if (isNullConstant(N1) && CC == ISD::SETEQ &&
         (!LegalOperations ||
          TLI.isOperationLegal(ISD::CTLZ, XType))) {
       SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
       return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
                          DAG.getConstant(Log2_32(XType.getSizeInBits()),
+                                         SDLoc(Ctlz),
                                        getShiftAmountTy(Ctlz.getValueType())));
     }
     // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
-    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
-      SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
-                                  XType, DAG.getConstant(0, XType), N0);
-      SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
+    if (isNullConstant(N1) && CC == ISD::SETGT) {
+      SDLoc DL(N0);
+      SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
+                                  XType, DAG.getConstant(0, DL, XType), N0);
+      SDValue NotN0 = DAG.getNOT(DL, N0, XType);
       return DAG.getNode(ISD::SRL, DL, XType,
                          DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
-                         DAG.getConstant(XType.getSizeInBits()-1,
+                         DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                          getShiftAmountTy(XType)));
     }
     // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
-    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
-      SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
-                                 DAG.getConstant(XType.getSizeInBits()-1,
+    if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
+      SDLoc DL(N0);
+      SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
+                                 DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                          getShiftAmountTy(N0.getValueType())));
-      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
+                                                                    XType));
     }
   }
 
@@ -12269,11 +13502,12 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
 
     EVT XType = N0.getValueType();
     if (SubC && SubC->isNullValue() && XType.isInteger()) {
-      SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
+      SDLoc DL(N0);
+      SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
                                   N0,
-                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                  DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                          getShiftAmountTy(N0.getValueType())));
-      SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
+      SDValue Add = DAG.getNode(ISD::ADD, DL,
                                 XType, N0, Shift);
       AddToWorklist(Shift.getNode());
       AddToWorklist(Add.getNode());
@@ -12303,7 +13537,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
     return SDValue();
 
   // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
     return SDValue();
 
   std::vector<SDNode*> Built;
@@ -12323,7 +13557,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
     return SDValue();
 
   // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
     return SDValue();
 
   std::vector<SDNode *> Built;
@@ -12344,7 +13578,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
     return SDValue();
 
   // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
     return SDValue();
 
   std::vector<SDNode*> Built;
@@ -12374,7 +13608,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
       //     does not require additional intermediate precision]
       EVT VT = Op.getValueType();
       SDLoc DL(Op);
-      SDValue FPOne = DAG.getConstantFP(1.0, VT);
+      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
 
       AddToWorklist(Est.getNode());
 
@@ -12409,7 +13643,7 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
                                           unsigned Iterations) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
-  SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
+  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
 
   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   // this entire sequence requires only one FP constant.
@@ -12445,8 +13679,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
                                           unsigned Iterations) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
-  SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
-  SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
+  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
+  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
 
   // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
   for (unsigned i = 0; i < Iterations; ++i) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index c46539b..0351c33 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -44,6 +44,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -61,8 +62,8 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -424,7 +425,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
 
   // Check if the second operand is a constant and handle it appropriately.
   if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-    uint64_t Imm = CI->getZExtValue();
+    uint64_t Imm = CI->getSExtValue();
 
     // Transform "sdiv exact X, 8" -> "sra X, 3".
     if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
@@ -710,7 +711,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
   CallingConv::ID CC = I->getCallingConv();
   bool IsAnyRegCC = CC == CallingConv::AnyReg;
   bool HasDef = !I->getType()->isVoidTy();
-  Value *Callee = I->getOperand(PatchPointOpers::TargetPos);
+  Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts();
 
   // Get the real number of arguments participating in the call <numArgs>
   assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
@@ -756,23 +757,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
       cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
   Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
 
-  // Assume that the callee is a constant address or null pointer.
-  // FIXME: handle function symbols in the future.
-  uint64_t CalleeAddr;
-  if (const auto *C = dyn_cast<IntToPtrInst>(Callee))
-    CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue();
-  else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
-    if (C->getOpcode() == Instruction::IntToPtr)
-      CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue();
-    else
+  // Add the call target.
+  if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) {
+    uint64_t CalleeConstAddr =
+      cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+    Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+  } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
+    if (C->getOpcode() == Instruction::IntToPtr) {
+      uint64_t CalleeConstAddr =
+        cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+      Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+    } else
       llvm_unreachable("Unsupported ConstantExpr.");
+  } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) {
+    Ops.push_back(MachineOperand::CreateGA(GV, 0));
   } else if (isa<ConstantPointerNull>(Callee))
-    CalleeAddr = 0;
+    Ops.push_back(MachineOperand::CreateImm(0));
   else
     llvm_unreachable("Unsupported callee address.");
 
-  Ops.push_back(MachineOperand::CreateImm(CalleeAddr));
-
   // Adjust <numArgs> to account for any arguments that have been passed on
   // the stack instead.
   unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size();
@@ -801,7 +804,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
     return false;
 
   // Push the register mask info.
-  Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC)));
+  Ops.push_back(MachineOperand::CreateRegMask(
+      TRI.getCallPreservedMask(*FuncInfo.MF, CC)));
 
   // Add scratch registers as implicit def and early clobber.
   const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
@@ -1077,12 +1081,17 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
   // The donothing intrinsic does, well, nothing.
   case Intrinsic::donothing:
     return true;
+  case Intrinsic::eh_actions: {
+    unsigned ResultReg = getRegForValue(UndefValue::get(II->getType()));
+    if (!ResultReg)
+      return false;
+    updateValueMap(II, ResultReg);
+    return true;
+  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
-    DIVariable DIVar(DI->getVariable());
-    assert((!DIVar || DIVar.isVariable()) &&
-           "Variable in DbgDeclareInst should be either null or a DIVariable.");
-    if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) {
+    assert(DI->getVariable() && "Missing variable");
+    if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
       DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
     }
@@ -1122,6 +1131,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
                                      false);
 
     if (Op) {
+      assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+             "Expected inlined-at fields to agree");
       if (Op->isReg()) {
         Op->setIsDebug(true);
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1146,6 +1157,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     const DbgValueInst *DI = cast<DbgValueInst>(II);
     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     const Value *V = DI->getValue();
+    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+           "Expected inlined-at fields to agree");
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
@@ -1580,7 +1593,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
                    bool SkipTargetIndependentISel)
     : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
       MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
-      TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()),
+      TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()),
       TII(*MF->getSubtarget().getInstrInfo()),
       TLI(*MF->getSubtarget().getTargetLowering()),
       TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
@@ -1662,6 +1675,7 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
   if (ResultReg)
     return ResultReg;
   unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+  bool IsImmKill = true;
   if (!MaterialReg) {
     // This is a bit ugly/slow, but failing here means falling out of
     // fast-isel, which would be very slow.
@@ -1670,9 +1684,15 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
     MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
     if (!MaterialReg)
       return 0;
+    // FIXME: If the materialized register here has no uses yet then this
+    // will be the first use and we should be able to mark it as killed.
+    // However, the local value area for materialising constant expressions
+    // grows down, not up, which means that any constant expressions we generate
+    // later which also use 'Imm' could be after this instruction and therefore
+    // after this kill.
+    IsImmKill = false;
   }
-  return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg,
-                     /*IsKill=*/true);
+  return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill);
 }
 
 unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 19aca6e..7b5b8c4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -31,6 +32,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -78,12 +80,40 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) {
   return ExtendKind;
 }
 
+namespace {
+struct WinEHNumbering {
+  WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo),
+      CurrentBaseState(-1), NextState(0) {}
+
+  WinEHFuncInfo &FuncInfo;
+  int CurrentBaseState;
+  int NextState;
+
+  SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack;
+  SmallPtrSet<const Function *, 4> VisitedHandlers;
+
+  int currentEHNumber() const {
+    return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState();
+  }
+
+  void createUnwindMapEntry(int ToState, ActionHandler *AH);
+  void createTryBlockMapEntry(int TryLow, int TryHigh,
+                              ArrayRef<CatchHandler *> Handlers);
+  void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
+                       ImmutableCallSite CS);
+  void popUnmatchedActions(int FirstMismatch);
+  void calculateStateNumbers(const Function &F);
+  void findActionRootLPads(const Function &F);
+};
+}
+
 void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                SelectionDAG *DAG) {
   Fn = &fn;
   MF = &mf;
   TLI = MF->getSubtarget().getTargetLowering();
   RegInfo = &MF->getRegInfo();
+  MachineModuleInfo &MMI = MF->getMMI();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -133,16 +163,17 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
         ImmutableCallSite CS(I);
         if (isa<InlineAsm>(CS.getCalledValue())) {
           unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+          const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
           std::vector<TargetLowering::AsmOperandInfo> Ops =
-            TLI->ParseConstraints(CS);
+              TLI->ParseConstraints(TRI, CS);
           for (size_t I = 0, E = Ops.size(); I != E; ++I) {
             TargetLowering::AsmOperandInfo &Op = Ops[I];
             if (Op.Type == InlineAsm::isClobber) {
               // Clobbers don't have SDValue operands, hence SDValue().
               TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
               std::pair<unsigned, const TargetRegisterClass *> PhysReg =
-                  TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
-                                                   Op.ConstraintVT);
+                  TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
+                                                    Op.ConstraintVT);
               if (PhysReg.first == SP)
                 MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
             }
@@ -176,13 +207,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       // during the initial isel pass through the IR so that it is done
       // in a predictable order.
       if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
-        MachineModuleInfo &MMI = MF->getMMI();
-        DIVariable DIVar(DI->getVariable());
-        assert((!DIVar || DIVar.isVariable()) &&
-          "Variable in DbgDeclareInst should be either null or a DIVariable.");
-        if (MMI.hasDebugInfo() &&
-            DIVar &&
-            !DI->getDebugLoc().isUnknown()) {
+        assert(DI->getVariable() && "Missing variable");
+        assert(DI->getDebugLoc() && "Missing location");
+        if (MMI.hasDebugInfo()) {
           // Don't handle byval struct arguments or VLAs, for example.
           // Non-byval arguments are handled here (they refer to the stack
           // temporary alloca at this point).
@@ -249,9 +276,414 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   }
 
   // Mark landing pad blocks.
-  for (BB = Fn->begin(); BB != EB; ++BB)
-    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+  SmallVector<const LandingPadInst *, 4> LPads;
+  for (BB = Fn->begin(); BB != EB; ++BB) {
+    if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
       MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+    if (BB->isLandingPad())
+      LPads.push_back(BB->getLandingPadInst());
+  }
+
+  // If this is an MSVC EH personality, we need to do a bit more work.
+  EHPersonality Personality = EHPersonality::Unknown;
+  if (!LPads.empty())
+    Personality = classifyEHPersonality(LPads.back()->getPersonalityFn());
+  if (!isMSVCEHPersonality(Personality))
+    return;
+
+  WinEHFuncInfo *EHInfo = nullptr;
+  if (Personality == EHPersonality::MSVC_Win64SEH) {
+    addSEHHandlersForLPads(LPads);
+  } else if (Personality == EHPersonality::MSVC_CXX) {
+    const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
+    EHInfo = &MMI.getWinEHFuncInfo(WinEHParentFn);
+    if (EHInfo->LandingPadStateMap.empty()) {
+      WinEHNumbering Num(*EHInfo);
+      Num.findActionRootLPads(*WinEHParentFn);
+      // The VisitedHandlers list is used by both findActionRootLPads and
+      // calculateStateNumbers, but both functions need to visit all handlers.
+      Num.VisitedHandlers.clear();
+      Num.calculateStateNumbers(*WinEHParentFn);
+      // Pop everything on the handler stack.
+      // It may be necessary to call this more than once because a handler can
+      // be pushed on the stack as a result of clearing the stack.
+      while (!Num.HandlerStack.empty())
+        Num.processCallSite(None, ImmutableCallSite());
+    }
+
+    // Copy the state numbers to LandingPadInfo for the current function, which
+    // could be a handler or the parent.
+    for (const LandingPadInst *LP : LPads) {
+      MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
+      MMI.addWinEHState(LPadMBB, EHInfo->LandingPadStateMap[LP]);
+    }
+  }
+}
+
+void FunctionLoweringInfo::addSEHHandlersForLPads(
+    ArrayRef<const LandingPadInst *> LPads) {
+  MachineModuleInfo &MMI = MF->getMMI();
+
+  // Iterate over all landing pads with llvm.eh.actions calls.
+  for (const LandingPadInst *LP : LPads) {
+    const IntrinsicInst *ActionsCall =
+        dyn_cast<IntrinsicInst>(LP->getNextNode());
+    if (!ActionsCall ||
+        ActionsCall->getIntrinsicID() != Intrinsic::eh_actions)
+      continue;
+
+    // Parse the llvm.eh.actions call we found.
+    MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
+    SmallVector<std::unique_ptr<ActionHandler>, 4> Actions;
+    parseEHActions(ActionsCall, Actions);
+
+    // Iterate EH actions from most to least precedence, which means
+    // iterating in reverse.
+    for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) {
+      ActionHandler *Action = I->get();
+      if (auto *CH = dyn_cast<CatchHandler>(Action)) {
+        const auto *Filter =
+            dyn_cast<Function>(CH->getSelector()->stripPointerCasts());
+        assert((Filter || CH->getSelector()->isNullValue()) &&
+               "expected function or catch-all");
+        const auto *RecoverBA =
+            cast<BlockAddress>(CH->getHandlerBlockOrFunc());
+        MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA);
+      } else {
+        assert(isa<CleanupHandler>(Action));
+        const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc());
+        MMI.addSEHCleanupHandler(LPadMBB, Fini);
+      }
+    }
+  }
+}
+
+void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
+  WinEHUnwindMapEntry UME;
+  UME.ToState = ToState;
+  if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
+    UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
+  else
+    UME.Cleanup = nullptr;
+  FuncInfo.UnwindMap.push_back(UME);
+}
+
+void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
+                                            ArrayRef<CatchHandler *> Handlers) {
+  // See if we already have an entry for this set of handlers.
+  // This is using iterators rather than a range-based for loop because
+  // if we find the entry we're looking for we'll need the iterator to erase it.
+  int NumHandlers = Handlers.size();
+  auto I = FuncInfo.TryBlockMap.begin();
+  auto E = FuncInfo.TryBlockMap.end();
+  for ( ; I != E; ++I) {
+    auto &Entry = *I;
+    if (Entry.HandlerArray.size() != (size_t)NumHandlers)
+      continue;
+    int N;
+    for (N = 0; N < NumHandlers; ++N) {
+      if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc())
+        break; // breaks out of inner loop
+    }
+    // If all the handlers match, this is what we were looking for.
+    if (N == NumHandlers) {
+      break;
+    }
+  }
+
+  // If we found an existing entry for this set of handlers, extend the range
+  // but move the entry to the end of the map vector.  The order of entries
+  // in the map is critical to the way that the runtime finds handlers.
+  // FIXME: Depending on what has happened with block ordering, this may
+  //        incorrectly combine entries that should remain separate.
+  if (I != E) {
+    // Copy the existing entry.
+    WinEHTryBlockMapEntry Entry = *I;
+    Entry.TryLow = std::min(TryLow, Entry.TryLow);
+    Entry.TryHigh = std::max(TryHigh, Entry.TryHigh);
+    assert(Entry.TryLow <= Entry.TryHigh);
+    // Erase the old entry and add this one to the back.
+    FuncInfo.TryBlockMap.erase(I);
+    FuncInfo.TryBlockMap.push_back(Entry);
+    return;
+  }
+
+  // If we didn't find an entry, create a new one.
+  WinEHTryBlockMapEntry TBME;
+  TBME.TryLow = TryLow;
+  TBME.TryHigh = TryHigh;
+  assert(TBME.TryLow <= TBME.TryHigh);
+  for (CatchHandler *CH : Handlers) {
+    WinEHHandlerType HT;
+    if (CH->getSelector()->isNullValue()) {
+      HT.Adjectives = 0x40;
+      HT.TypeDescriptor = nullptr;
+    } else {
+      auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
+      // Selectors are always pointers to GlobalVariables with 'struct' type.
+      // The struct has two fields, adjectives and a type descriptor.
+      auto *CS = cast<ConstantStruct>(GV->getInitializer());
+      HT.Adjectives =
+          cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
+      HT.TypeDescriptor =
+          cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
+    }
+    HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
+    HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
+    TBME.HandlerArray.push_back(HT);
+  }
+  FuncInfo.TryBlockMap.push_back(TBME);
+}
+
+static void print_name(const Value *V) {
+#ifndef NDEBUG
+  if (!V) {
+    DEBUG(dbgs() << "null");
+    return;
+  }
+
+  if (const auto *F = dyn_cast<Function>(V))
+    DEBUG(dbgs() << F->getName());
+  else
+    DEBUG(V->dump());
+#endif
+}
+
+void WinEHNumbering::processCallSite(
+    MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
+    ImmutableCallSite CS) {
+  DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber()
+               << ") for: ");
+  print_name(CS ? CS.getCalledValue() : nullptr);
+  DEBUG(dbgs() << '\n');
+
+  DEBUG(dbgs() << "HandlerStack: \n");
+  for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
+    DEBUG(dbgs() << "  ");
+    print_name(HandlerStack[I]->getHandlerBlockOrFunc());
+    DEBUG(dbgs() << '\n');
+  }
+  DEBUG(dbgs() << "Actions: \n");
+  for (int I = 0, E = Actions.size(); I < E; ++I) {
+    DEBUG(dbgs() << "  ");
+    print_name(Actions[I]->getHandlerBlockOrFunc());
+    DEBUG(dbgs() << '\n');
+  }
+  int FirstMismatch = 0;
+  for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
+       ++FirstMismatch) {
+    if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
+        Actions[FirstMismatch]->getHandlerBlockOrFunc())
+      break;
+  }
+
+  // Remove unmatched actions from the stack and process their EH states.
+  popUnmatchedActions(FirstMismatch);
+
+  DEBUG(dbgs() << "Pushing actions for CallSite: ");
+  print_name(CS ? CS.getCalledValue() : nullptr);
+  DEBUG(dbgs() << '\n');
+
+  bool LastActionWasCatch = false;
+  const LandingPadInst *LastRootLPad = nullptr;
+  for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
+    // We can reuse eh states when pushing two catches for the same invoke.
+    bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get());
+    auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc());
+    // Various conditions can lead to a handler being popped from the
+    // stack and re-pushed later.  That shouldn't create a new state.
+    // FIXME: Can code optimization lead to re-used handlers?
+    if (FuncInfo.HandlerEnclosedState.count(Handler)) {
+      // If we already assigned the state enclosed by this handler re-use it.
+      Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]);
+      continue;
+    }
+    const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler];
+    if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) {
+      DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n");
+      Actions[I]->setEHState(currentEHNumber());
+    } else {
+      DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", ");
+      print_name(Actions[I]->getHandlerBlockOrFunc());
+      DEBUG(dbgs() << ") with EH state " << NextState << "\n");
+      createUnwindMapEntry(currentEHNumber(), Actions[I].get());
+      DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n");
+      Actions[I]->setEHState(NextState);
+      NextState++;
+    }
+    HandlerStack.push_back(std::move(Actions[I]));
+    LastActionWasCatch = CurrActionIsCatch;
+    LastRootLPad = RootLPad;
+  }
+
+  // This is used to defer numbering states for a handler until after the
+  // last time it appears in an invoke action list.
+  if (CS.isInvoke()) {
+    for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
+      auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc());
+      if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction()))
+        continue;
+      FuncInfo.LastInvokeVisited[Handler] = true;
+      DEBUG(dbgs() << "Last invoke of ");
+      print_name(Handler);
+      DEBUG(dbgs() << " has been visited.\n");
+    }
+  }
+
+  DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
+  print_name(CS ? CS.getCalledValue() : nullptr);
+  DEBUG(dbgs() << '\n');
+}
+
+void WinEHNumbering::popUnmatchedActions(int FirstMismatch) {
+  // Don't recurse while we are looping over the handler stack.  Instead, defer
+  // the numbering of the catch handlers until we are done popping.
+  SmallVector<CatchHandler *, 4> PoppedCatches;
+  for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
+    std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val();
+    if (isa<CatchHandler>(Handler.get()))
+      PoppedCatches.push_back(cast<CatchHandler>(Handler.release()));
+  }
+
+  int TryHigh = NextState - 1;
+  int LastTryLowIdx = 0;
+  for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
+    CatchHandler *CH = PoppedCatches[I];
+    DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n");
+    if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
+      int TryLow = CH->getEHState();
+      auto Handlers =
+          makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
+      DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh);
+      for (size_t J = 0; J < Handlers.size(); ++J) {
+        DEBUG(dbgs() << ", ");
+        print_name(Handlers[J]->getHandlerBlockOrFunc());
+      }
+      DEBUG(dbgs() << ")\n");
+      createTryBlockMapEntry(TryLow, TryHigh, Handlers);
+      LastTryLowIdx = I + 1;
+    }
+  }
+
+  for (CatchHandler *CH : PoppedCatches) {
+    if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) {
+      if (FuncInfo.LastInvokeVisited[F]) {
+        DEBUG(dbgs() << "Assigning base state " << NextState << " to ");
+        print_name(F);
+        DEBUG(dbgs() << '\n');
+        FuncInfo.HandlerBaseState[F] = NextState;
+        DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() 
+                     << ", null)\n");
+        createUnwindMapEntry(currentEHNumber(), nullptr);
+        ++NextState;
+        calculateStateNumbers(*F);
+      }
+      else {
+        DEBUG(dbgs() << "Deferring handling of ");
+        print_name(F);
+        DEBUG(dbgs() << " until last invoke visited.\n");
+      }
+    }
+    delete CH;
+  }
+}
+
+void WinEHNumbering::calculateStateNumbers(const Function &F) {
+  auto I = VisitedHandlers.insert(&F);
+  if (!I.second)
+    return; // We've already visited this handler, don't renumber it.
+
+  int OldBaseState = CurrentBaseState;
+  if (FuncInfo.HandlerBaseState.count(&F)) {
+    CurrentBaseState = FuncInfo.HandlerBaseState[&F];
+  }
+
+  size_t SavedHandlerStackSize = HandlerStack.size();
+
+  DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
+  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+  for (const BasicBlock &BB : F) {
+    for (const Instruction &I : BB) {
+      const auto *CI = dyn_cast<CallInst>(&I);
+      if (!CI || CI->doesNotThrow())
+        continue;
+      processCallSite(None, CI);
+    }
+    const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
+      continue;
+    const LandingPadInst *LPI = II->getLandingPadInst();
+    auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
+    if (!ActionsCall)
+      continue;
+    assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
+    parseEHActions(ActionsCall, ActionList);
+    if (ActionList.empty())
+      continue;
+    processCallSite(ActionList, II);
+    ActionList.clear();
+    FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
+    DEBUG(dbgs() << "Assigning state " << currentEHNumber()
+                  << " to landing pad at " << LPI->getParent()->getName()
+                  << '\n');
+  }
+
+  // Pop any actions that were pushed on the stack for this function.
+  popUnmatchedActions(SavedHandlerStackSize);
+
+  DEBUG(dbgs() << "Assigning max state " << NextState - 1
+               << " to " << F.getName() << '\n');
+  FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
+
+  CurrentBaseState = OldBaseState;
+}
+
+// This function follows the same basic traversal as calculateStateNumbers
+// but it is necessary to identify the root landing pad associated
+// with each action before we start assigning state numbers.
+void WinEHNumbering::findActionRootLPads(const Function &F) {
+  auto I = VisitedHandlers.insert(&F);
+  if (!I.second)
+    return; // We've already visited this handler, don't revisit it.
+
+  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+  for (const BasicBlock &BB : F) {
+    const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
+      continue;
+    const LandingPadInst *LPI = II->getLandingPadInst();
+    auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
+    if (!ActionsCall)
+      continue;
+
+    assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
+    parseEHActions(ActionsCall, ActionList);
+    if (ActionList.empty())
+      continue;
+    for (int I = 0, E = ActionList.size(); I < E; ++I) {
+      if (auto *Handler
+              = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) {
+        FuncInfo.LastInvoke[Handler] = II;
+        // Don't replace the root landing pad if we previously saw this
+        // handler in a different function.
+        if (FuncInfo.RootLPad.count(Handler) &&
+            FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F)
+          continue;
+        DEBUG(dbgs() << "Setting root lpad for ");
+        print_name(Handler);
+        DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n');
+        FuncInfo.RootLPad[Handler] = LPI;
+      }
+    }
+    // Walk the actions again and look for nested handlers.  This has to
+    // happen after all of the actions have been processed in the current
+    // function.
+    for (int I = 0, E = ActionList.size(); I < E; ++I)
+      if (auto *Handler
+              = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc()))
+        findActionRootLPads(*Handler);
+    ActionList.clear();
+  }
 }
 
 /// clear - Clear out all the function-specific state. This returns this
@@ -274,6 +706,7 @@ void FunctionLoweringInfo::clear() {
   ByValArgFrameIndexMap.clear();
   RegFixups.clear();
   StatepointStackSlots.clear();
+  StatepointRelocatedValues.clear();
   PreferredExtendType.clear();
 }
 
@@ -460,8 +893,7 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
   if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
     for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
       Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
-           i != e; ++i) {
+      for (auto i : post_order(T)) {
         if (i->isFloatingPointTy()) {
           MMI->setUsesVAFloatArgument(true);
           return;
@@ -471,60 +903,6 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
   }
 }
 
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
-                        MachineBasicBlock *MBB) {
-  // Inform the MachineModuleInfo of the personality for this landing pad.
-  const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
-  assert(CE->getOpcode() == Instruction::BitCast &&
-         isa<Function>(CE->getOperand(0)) &&
-         "Personality should be a function");
-  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
-
-  // Gather all the type infos for this landing pad and pass them along to
-  // MachineModuleInfo.
-  std::vector<const GlobalValue *> TyInfo;
-  unsigned N = I.getNumArgOperands();
-
-  for (unsigned i = N - 1; i > 1; --i) {
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
-      unsigned FilterLength = CI->getZExtValue();
-      unsigned FirstCatch = i + FilterLength + !FilterLength;
-      assert(FirstCatch <= N && "Invalid filter length");
-
-      if (FirstCatch < N) {
-        TyInfo.reserve(N - FirstCatch);
-        for (unsigned j = FirstCatch; j < N; ++j)
-          TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
-        MMI->addCatchTypeInfo(MBB, TyInfo);
-        TyInfo.clear();
-      }
-
-      if (!FilterLength) {
-        // Cleanup.
-        MMI->addCleanup(MBB);
-      } else {
-        // Filter.
-        TyInfo.reserve(FilterLength - 1);
-        for (unsigned j = i + 1; j < FirstCatch; ++j)
-          TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
-        MMI->addFilterTypeInfo(MBB, TyInfo);
-        TyInfo.clear();
-      }
-
-      N = i;
-    }
-  }
-
-  if (N > 2) {
-    TyInfo.reserve(N - 2);
-    for (unsigned j = 2; j < N; ++j)
-      TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
-    MMI->addCatchTypeInfo(MBB, TyInfo);
-  }
-}
-
 /// AddLandingPadInfo - Extract the exception handling information from the
 /// landingpad instruction and add them to the specified machine module info.
 void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a65f33e..7abc0c4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
     Type *Type = CP->getType();
     // MachineConstantPool wants an explicit alignment.
     if (Align == 0) {
-      Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type);
+      Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type);
       if (Align == 0) {
         // Alignment of vector types.  FIXME!
-        Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type);
+        Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type);
       }
     }
 
@@ -650,6 +650,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
   MDNode *Var = SD->getVariable();
   MDNode *Expr = SD->getExpression();
   DebugLoc DL = SD->getDebugLoc();
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
 
   if (SD->getKind() == SDDbgValue::FRAMEIX) {
     // Stack address; this needs to be lowered in target-dependent fashion.
@@ -951,6 +953,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     // Remember to operand index of the group flags.
     SmallVector<unsigned, 8> GroupIdx;
 
+    // Remember registers that are part of early-clobber defs.
+    SmallVector<unsigned, 8> ECRegs;
+
     // Add all of the operand registers to the instruction.
     for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
       unsigned Flags =
@@ -979,6 +984,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
                   getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+          ECRegs.push_back(Reg);
         }
         break;
       case InlineAsm::Kind_RegUse:  // Use of register.
@@ -1004,6 +1010,19 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       }
     }
 
+    // GCC inline assembly allows input operands to also be early-clobber
+    // output operands (so long as the operand is written only after it's
+    // used), but this does not match the semantics of our early-clobber flag.
+    // If an early-clobber operand register is also an input operand register,
+    // then remove the early-clobber flag.
+    for (unsigned Reg : ECRegs) {
+      if (MIB->readsRegister(Reg, TRI)) {
+        MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI);
+        assert(MO && "No def operand for clobbered register?");
+        MO->setIsEarlyClobber(false);
+      }
+    }
+
     // Get the mdnode from the asm if it exists and add it to the instruction.
     SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
     const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e5473e3..7d98872 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -249,7 +249,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
   ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
   if (!UseCP) {
     assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
-    return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+    return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl,
                            (VT == MVT::f64) ? MVT::i64 : MVT::i32);
   }
 
@@ -331,7 +331,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
     SDValue Store = DAG.getTruncStore(Chain, dl,
                                       Val, StackPtr, MachinePointerInfo(),
                                       StoredVT, false, false, 0);
-    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS));
+    SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy(AS));
     SmallVector<SDValue, 8> Stores;
     unsigned Offset = 0;
 
@@ -385,7 +385,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   int IncrementSize = NumBits / 8;
 
   // Divide the stored value in two parts.
-  SDValue ShiftAmount = DAG.getConstant(NumBits,
+  SDValue ShiftAmount = DAG.getConstant(NumBits, dl,
                                       TLI.getShiftAmountTy(Val.getValueType()));
   SDValue Lo = Val;
   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
@@ -397,7 +397,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
                              ST->isVolatile(), ST->isNonTemporal(), Alignment);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, TLI.getPointerTy(AS)));
+                    DAG.getConstant(IncrementSize, dl, TLI.getPointerTy(AS)));
   Alignment = MinAlign(Alignment, IncrementSize);
   Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
                              ST->getPointerInfo().getWithOffset(IncrementSize),
@@ -448,7 +448,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
     // Make sure the stack slot is also aligned for the register type.
     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
 
-    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy());
     SmallVector<SDValue, 8> Stores;
     SDValue StackPtr = StackBase;
     unsigned Offset = 0;
@@ -528,7 +528,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                         LD->isNonTemporal(), LD->isInvariant(), Alignment,
                         LD->getAAInfo());
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
                         LD->getPointerInfo().getWithOffset(IncrementSize),
                         NewLoadedVT, LD->isVolatile(),
@@ -540,7 +540,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                         LD->isNonTemporal(), LD->isInvariant(), Alignment,
                         LD->getAAInfo());
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
                         LD->getPointerInfo().getWithOffset(IncrementSize),
                         NewLoadedVT, LD->isVolatile(),
@@ -549,7 +549,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   }
 
   // aggregate the two parts
-  SDValue ShiftAmount = DAG.getConstant(NumBits,
+  SDValue ShiftAmount = DAG.getConstant(NumBits, dl,
                                        TLI.getShiftAmountTy(Hi.getValueType()));
   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
@@ -596,7 +596,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
   // Add the offset to the index.
   unsigned EltSize = EltVT.getSizeInBits()/8;
-  Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+  Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
+                     DAG.getConstant(EltSize, dl, IdxVT));
   SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
   // Store the scalar value.
   Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
@@ -655,7 +656,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
         TLI.isTypeLegal(MVT::i32)) {
       SDValue Con = DAG.getConstant(CFP->getValueAPF().
                                       bitcastToAPInt().zextOrTrunc(32),
-                              MVT::i32);
+                                    SDLoc(CFP), MVT::i32);
       return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
                           isVolatile, isNonTemporal, Alignment, AAInfo);
     }
@@ -664,7 +665,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
       // If this target supports 64-bit registers, do a single 64-bit store.
       if (TLI.isTypeLegal(MVT::i64)) {
         SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
-                                  zextOrTrunc(64), MVT::i64);
+                                      zextOrTrunc(64), SDLoc(CFP), MVT::i64);
         return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
                             isVolatile, isNonTemporal, Alignment, AAInfo);
       }
@@ -673,15 +674,15 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
         // xform is certainly not worth it.
-        const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
-        SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
-        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+        const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt();
+        SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32);
+        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32);
         if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
         Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
                           isNonTemporal, Alignment, AAInfo);
         Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                          DAG.getConstant(4, Ptr.getValueType()));
+                          DAG.getConstant(4, dl, Ptr.getValueType()));
         Hi = DAG.getStore(Chain, dl, Hi, Ptr,
                           ST->getPointerInfo().getWithOffset(4),
                           isVolatile, isNonTemporal, MinAlign(Alignment, 4U),
@@ -731,7 +732,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         }
         case TargetLowering::Custom: {
           SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
-          if (Res.getNode())
+          if (Res && Res != SDValue(Node, 0))
             ReplaceNode(SDValue(Node, 0), Res);
           return;
         }
@@ -792,9 +793,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
           Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                            DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                            DAG.getConstant(IncrementSize, dl,
+                                            Ptr.getValueType()));
           Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
-                           DAG.getConstant(RoundWidth,
+                           DAG.getConstant(RoundWidth, dl,
                                    TLI.getShiftAmountTy(Value.getValueType())));
           Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
                              ST->getPointerInfo().getWithOffset(IncrementSize),
@@ -805,7 +807,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
           // Store the top RoundWidth bits.
           Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
-                           DAG.getConstant(ExtraWidth,
+                           DAG.getConstant(ExtraWidth, dl,
                                    TLI.getShiftAmountTy(Value.getValueType())));
           Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
                                  RoundVT, isVolatile, isNonTemporal, Alignment,
@@ -814,7 +816,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
           Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                            DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                            DAG.getConstant(IncrementSize, dl,
+                                            Ptr.getValueType()));
           Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
                               ST->getPointerInfo().getWithOffset(IncrementSize),
                                  ExtraVT, isVolatile, isNonTemporal,
@@ -843,7 +846,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         }
         case TargetLowering::Custom: {
           SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
-          if (Res.getNode())
+          if (Res && Res != SDValue(Node, 0))
             ReplaceNode(SDValue(Node, 0), Res);
           return;
         }
@@ -1004,7 +1007,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       // Load the remaining ExtraWidth bits.
       IncrementSize = RoundWidth / 8;
       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                         DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                         DAG.getConstant(IncrementSize, dl,
+                                         Ptr.getValueType()));
       Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
                           LD->getPointerInfo().getWithOffset(IncrementSize),
                           ExtraVT, isVolatile, isNonTemporal, isInvariant,
@@ -1017,7 +1021,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
       // Move the top bits to the right place.
       Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                       DAG.getConstant(RoundWidth,
+                       DAG.getConstant(RoundWidth, dl,
                                       TLI.getShiftAmountTy(Hi.getValueType())));
 
       // Join the hi and lo parts.
@@ -1033,7 +1037,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       // Load the remaining ExtraWidth bits.
       IncrementSize = RoundWidth / 8;
       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                         DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                         DAG.getConstant(IncrementSize, dl,
+                                         Ptr.getValueType()));
       Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
                           dl, Node->getValueType(0), Chain, Ptr,
                           LD->getPointerInfo().getWithOffset(IncrementSize),
@@ -1047,7 +1052,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
       // Move the top bits to the right place.
       Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                       DAG.getConstant(ExtraWidth,
+                       DAG.getConstant(ExtraWidth, dl,
                                       TLI.getShiftAmountTy(Hi.getValueType())));
 
       // Join the hi and lo parts.
@@ -1240,12 +1245,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     break;
   case ISD::EXTRACT_ELEMENT:
   case ISD::FLT_ROUNDS_:
-  case ISD::SADDO:
-  case ISD::SSUBO:
-  case ISD::UADDO:
-  case ISD::USUBO:
-  case ISD::SMULO:
-  case ISD::UMULO:
   case ISD::FPOWI:
   case ISD::MERGE_VALUES:
   case ISD::EH_RETURN:
@@ -1437,18 +1436,32 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   unsigned EltSize =
       Vec.getValueType().getVectorElementType().getSizeInBits()/8;
   Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
-                    DAG.getConstant(EltSize, Idx.getValueType()));
+                    DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
 
   Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
+  SDValue NewLoad;
+
   if (Op.getValueType().isVector())
-    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
-                       false, false, false, 0);
-  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
-                        MachinePointerInfo(),
-                        Vec.getValueType().getVectorElementType(),
-                        false, false, false, 0);
+    NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,
+                          MachinePointerInfo(), false, false, false, 0);
+  else
+    NewLoad = DAG.getExtLoad(
+        ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(),
+        Vec.getValueType().getVectorElementType(), false, false, false, 0);
+
+  // Replace the chain going out of the store, by the one out of the load.
+  DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
+
+  // We introduced a cycle though, so update the loads operands, making sure
+  // to use the original store's chain as an incoming chain.
+  SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(),
+                                          NewLoad->op_end());
+  NewLoadOperands[0] = Ch;
+  NewLoad =
+      SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0);
+  return NewLoad;
 }
 
 SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
@@ -1476,7 +1489,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
       Vec.getValueType().getVectorElementType().getSizeInBits()/8;
 
   Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
-                    DAG.getConstant(EltSize, Idx.getValueType()));
+                    DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
   Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
 
   SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
@@ -1513,7 +1526,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
 
     unsigned Offset = TypeByteSize*i;
 
-    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
 
     // If the destination vector element type is narrower than the source
@@ -1575,7 +1588,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
       unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
       LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
-                           DAG.getConstant(ByteOffset, LoadPtr.getValueType()));
+                           DAG.getConstant(ByteOffset, dl,
+                                           LoadPtr.getValueType()));
       // Load a legal integer containing the sign bit.
       SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
                             false, false, false, 0);
@@ -1585,13 +1599,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
       if (BitShift)
         SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
-                              DAG.getConstant(BitShift,
+                              DAG.getConstant(BitShift, dl,
                                  TLI.getShiftAmountTy(SignBit.getValueType())));
     }
   }
   // Now get the sign bit proper, by seeing whether the value is negative.
   SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()),
-                         SignBit, DAG.getConstant(0, SignBit.getValueType()),
+                         SignBit,
+                         DAG.getConstant(0, dl, SignBit.getValueType()),
                          ISD::SETLT);
   // Get the absolute value of the result.
   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
@@ -1616,8 +1631,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
 
   // Chain the dynamic stack allocation so that it doesn't modify the stack
   // pointer when other instructions are using the stack.
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true),
-                               SDLoc(Node));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
 
   SDValue Size  = Tmp2.getOperand(1);
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
@@ -1628,12 +1642,11 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size);       // Value
   if (Align > StackAlign)
     Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
-                       DAG.getConstant(-(uint64_t)Align, VT));
+                       DAG.getConstant(-(uint64_t)Align, dl, VT));
   Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1);     // Output chain
 
-  Tmp2 = DAG.getCALLSEQ_END(Chain,  DAG.getIntPtrConstant(0, true),
-                            DAG.getIntPtrConstant(0, true), SDValue(),
-                            SDLoc(Node));
+  Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
+                            DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
 
   Results.push_back(Tmp1);
   Results.push_back(Tmp2);
@@ -2404,7 +2417,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
 
     // word offset constant for Hi/Lo address computation
-    SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType());
+    SDValue WordOff = DAG.getConstant(sizeof(int), dl,
+                                      StackSlot.getValueType());
     // set up Hi and Lo (into buffer) address based on endian
     SDValue Hi = StackSlot;
     SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
@@ -2416,7 +2430,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Op0Mapped;
     if (isSigned) {
       // constant used to invert sign bit (signed to unsigned mapping)
-      SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+      SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32);
       Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
     } else {
       Op0Mapped = Op0;
@@ -2426,7 +2440,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                   Op0Mapped, Lo, MachinePointerInfo(),
                                   false, false, 0);
     // initial hi portion of constructed double
-    SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+    SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32);
     // store the hi of the constructed double - biased exponent
     SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
                                   MachinePointerInfo(),
@@ -2438,7 +2452,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
                                      BitsToDouble(0x4330000000000000ULL),
-                                     MVT::f64);
+                                     dl, MVT::f64);
     // subtract the bias
     SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
     // final result
@@ -2449,7 +2463,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
       Result = Sub;
     } else if (DestVT.bitsLT(MVT::f64)) {
       Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
-                           DAG.getIntPtrConstant(0));
+                           DAG.getIntPtrConstant(0, dl));
     } else if (DestVT.bitsGT(MVT::f64)) {
       Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
     }
@@ -2465,15 +2479,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // TODO: Generalize this for use with other types.
   if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
     SDValue TwoP52 =
-      DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+      DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
     SDValue TwoP84PlusTwoP52 =
-      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl,
+                        MVT::f64);
     SDValue TwoP84 =
-      DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+      DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64);
 
     SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
     SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
-                             DAG.getConstant(32, MVT::i64));
+                             DAG.getConstant(32, dl, MVT::i64));
     SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
     SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
     SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
@@ -2492,9 +2507,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
       SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
 
       SDValue ShiftConst =
-          DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
+          DAG.getConstant(1, dl, TLI.getShiftAmountTy(Op0.getValueType()));
       SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
-      SDValue AndConst = DAG.getConstant(1, MVT::i64);
+      SDValue AndConst = DAG.getConstant(1, dl, MVT::i64);
       SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
       SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
 
@@ -2506,47 +2521,52 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
       // thing most of the time.  This would be a good candidate for a
       //pseudo-op, or, even better, for whole-function isel.
       SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
-        Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+        Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
       return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast);
     }
 
     // Otherwise, implement the fully general conversion.
 
     SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
-         DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+         DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64));
     SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
-         DAG.getConstant(UINT64_C(0x800), MVT::i64));
+         DAG.getConstant(UINT64_C(0x800), dl, MVT::i64));
     SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
-         DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
-    SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
-                   And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+         DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64));
+    SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2,
+                              DAG.getConstant(UINT64_C(0), dl, MVT::i64),
+                              ISD::SETNE);
     SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0);
-    SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
-                   Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
-                   ISD::SETUGE);
+    SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0,
+                              DAG.getConstant(UINT64_C(0x0020000000000000), dl,
+                                              MVT::i64),
+                              ISD::SETUGE);
     SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
     EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
 
     SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
-                             DAG.getConstant(32, SHVT));
+                             DAG.getConstant(32, dl, SHVT));
     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
     SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
     SDValue TwoP32 =
-      DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl,
+                        MVT::f64);
     SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
     SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
     SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
     SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
     return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
-                       DAG.getIntPtrConstant(0));
+                       DAG.getIntPtrConstant(0, dl));
   }
 
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
 
   SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()),
-                                 Op0, DAG.getConstant(0, Op0.getValueType()),
+                                 Op0,
+                                 DAG.getConstant(0, dl, Op0.getValueType()),
                                  ISD::SETLT);
-  SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+  SDValue Zero = DAG.getIntPtrConstant(0, dl),
+          Four = DAG.getIntPtrConstant(4, dl);
   SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
                                     SignSet, Four, Zero);
 
@@ -2681,34 +2701,41 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
   switch (VT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
-    Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
     return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   case MVT::i32:
-    Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
-    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
-    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
-    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+    Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+                       DAG.getConstant(0xFF0000, dl, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
     Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
     return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
   case MVT::i64:
-    Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
-    Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
-    Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
-    Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
-    Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
-    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
-    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
-    Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
-    Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
-    Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
-    Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
-    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
-    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+    Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+    Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+    Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+    Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+    Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
+                       DAG.getConstant(255ULL<<48, dl, VT));
+    Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
+                       DAG.getConstant(255ULL<<40, dl, VT));
+    Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
+                       DAG.getConstant(255ULL<<32, dl, VT));
+    Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
+                       DAG.getConstant(255ULL<<24, dl, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+                       DAG.getConstant(255ULL<<16, dl, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
+                       DAG.getConstant(255ULL<<8 , dl, VT));
     Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
     Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
     Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
@@ -2735,34 +2762,38 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // This is the "best" algorithm from
     // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
 
-    SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT);
-    SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT);
-    SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT);
-    SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT);
+    SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)),
+                                     dl, VT);
+    SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)),
+                                     dl, VT);
+    SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)),
+                                     dl, VT);
+    SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)),
+                                     dl, VT);
 
     // v = v - ((v >> 1) & 0x55555555...)
     Op = DAG.getNode(ISD::SUB, dl, VT, Op,
                      DAG.getNode(ISD::AND, dl, VT,
                                  DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(1, ShVT)),
+                                             DAG.getConstant(1, dl, ShVT)),
                                  Mask55));
     // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
     Op = DAG.getNode(ISD::ADD, dl, VT,
                      DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
                      DAG.getNode(ISD::AND, dl, VT,
                                  DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(2, ShVT)),
+                                             DAG.getConstant(2, dl, ShVT)),
                                  Mask33));
     // v = (v + (v >> 4)) & 0x0F0F0F0F...
     Op = DAG.getNode(ISD::AND, dl, VT,
                      DAG.getNode(ISD::ADD, dl, VT, Op,
                                  DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(4, ShVT))),
+                                             DAG.getConstant(4, dl, ShVT))),
                      Mask0F);
     // v = (v * 0x01010101...) >> (Len - 8)
     Op = DAG.getNode(ISD::SRL, dl, VT,
                      DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
-                     DAG.getConstant(Len - 8, ShVT));
+                     DAG.getConstant(Len - 8, dl, ShVT));
 
     return Op;
   }
@@ -2783,7 +2814,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     EVT ShVT = TLI.getShiftAmountTy(VT);
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
-      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
       Op = DAG.getNode(ISD::OR, dl, VT, Op,
                        DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
     }
@@ -2802,12 +2833,12 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
                                DAG.getNOT(dl, Op, VT),
                                DAG.getNode(ISD::SUB, dl, VT, Op,
-                                           DAG.getConstant(1, VT)));
+                                           DAG.getConstant(1, dl, VT)));
     // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
     if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
         TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
       return DAG.getNode(ISD::SUB, dl, VT,
-                         DAG.getConstant(VT.getSizeInBits(), VT),
+                         DAG.getConstant(VT.getSizeInBits(), dl, VT),
                          DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
     return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
   }
@@ -2817,132 +2848,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
 std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
   unsigned Opc = Node->getOpcode();
   MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
-  RTLIB::Libcall LC;
-
-  switch (Opc) {
-  default:
-    llvm_unreachable("Unhandled atomic intrinsic Expand!");
-  case ISD::ATOMIC_SWAP:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
-    }
-    break;
-  case ISD::ATOMIC_CMP_SWAP:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_ADD:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_SUB:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_AND:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_OR:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_XOR:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_NAND:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_MAX:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_MAX_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_UMAX:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_MIN:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_MIN_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_UMIN:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break;
-    }
-    break;
-  }
+  RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
 
   return ExpandChainLibCall(LC, Node, false);
 }
@@ -2967,10 +2874,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
   case ISD::FRAME_TO_ARGS_OFFSET:
-    Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+    Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
     break;
   case ISD::FLT_ROUNDS_:
-    Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+    Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
     break;
   case ISD::EH_RETURN:
   case ISD::EH_LABEL:
@@ -2984,7 +2891,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::EH_SJLJ_SETJMP:
     // If the target didn't expand this, just return 'zero' and preserve the
     // chain.
-    Results.push_back(DAG.getConstant(0, MVT::i32));
+    Results.push_back(DAG.getConstant(0, dl, MVT::i32));
     Results.push_back(Node->getOperand(0));
     break;
   case ISD::ATOMIC_FENCE: {
@@ -3005,7 +2912,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   }
   case ISD::ATOMIC_LOAD: {
     // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
-    SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
+    SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
     SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
     SDValue Swap = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
@@ -3081,10 +2988,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::UNDEF: {
     EVT VT = Node->getValueType(0);
     if (VT.isInteger())
-      Results.push_back(DAG.getConstant(0, VT));
+      Results.push_back(DAG.getConstant(0, dl, VT));
     else {
       assert(VT.isFloatingPoint() && "Unknown value type!");
-      Results.push_back(DAG.getConstantFP(0, VT));
+      Results.push_back(DAG.getConstantFP(0, dl, VT));
     }
     break;
   }
@@ -3123,7 +3030,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       ShiftAmountTy = VT;
     unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
                         ExtraVT.getScalarType().getSizeInBits();
-    SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+    SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
     Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
                        Node->getOperand(0), ShiftCst);
     Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
@@ -3161,7 +3068,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                 APInt::getNullValue(VT.getSizeInBits()));
     APInt x = APInt::getSignBit(NVT.getSizeInBits());
     (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
-    Tmp1 = DAG.getConstantFP(apf, VT);
+    Tmp1 = DAG.getConstantFP(apf, dl, VT);
     Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
                         Node->getOperand(0),
                         Tmp1, ISD::SETLT);
@@ -3170,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                         DAG.getNode(ISD::FSUB, dl, VT,
                                     Node->getOperand(0), Tmp1));
     False = DAG.getNode(ISD::XOR, dl, NVT, False,
-                        DAG.getConstant(x, NVT));
+                        DAG.getConstant(x, dl, NVT));
     Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False);
     Results.push_back(Tmp1);
     break;
@@ -3191,11 +3098,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
 
       VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
-                           DAG.getConstant(Align - 1,
+                           DAG.getConstant(Align - 1, dl,
                                            VAList.getValueType()));
 
       VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
-                           DAG.getConstant(-(int64_t)Align,
+                           DAG.getConstant(-(int64_t)Align, dl,
                                            VAList.getValueType()));
     }
 
@@ -3203,6 +3110,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
                        DAG.getConstant(TLI.getDataLayout()->
                           getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+                                       dl,
                                        VAList.getValueType()));
     // Store the incremented VAList to the legalized pointer
     Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
@@ -3317,11 +3225,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       if (Idx < NumElems)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
                                   Op0,
-                                  DAG.getConstant(Idx, TLI.getVectorIdxTy())));
+                                  DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())));
       else
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
                                   Op1,
-                                  DAG.getConstant(Idx - NumElems,
+                                  DAG.getConstant(Idx - NumElems, dl,
                                                   TLI.getVectorIdxTy())));
     }
 
@@ -3336,7 +3244,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
       // 1 -> Hi
       Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
-                         DAG.getConstant(OpTy.getSizeInBits()/2,
+                         DAG.getConstant(OpTy.getSizeInBits()/2, dl,
                     TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
       Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
     } else {
@@ -3374,7 +3282,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   case ISD::FNEG:
     // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
-    Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+    Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
     Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
                        Node->getOperand(0));
     Results.push_back(Tmp1);
@@ -3383,7 +3291,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
     EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
-    Tmp2 = DAG.getConstantFP(0.0, VT);
+    Tmp2 = DAG.getConstantFP(0.0, dl, VT);
     Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
                         Tmp1, Tmp2, ISD::SETUGT);
     Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
@@ -3391,6 +3299,26 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   }
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX: {
+    // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+    ISD::CondCode Pred;
+    switch (Node->getOpcode()) {
+    default: llvm_unreachable("How did we get here?");
+    case ISD::SMAX: Pred = ISD::SETGT; break;
+    case ISD::SMIN: Pred = ISD::SETLT; break;
+    case ISD::UMAX: Pred = ISD::SETUGT; break;
+    case ISD::UMIN: Pred = ISD::SETULT; break;
+    }
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred);
+    Results.push_back(Tmp1);
+    break;
+  }
+    
   case ISD::FMINNUM:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
                                       RTLIB::FMIN_F80, RTLIB::FMIN_F128,
@@ -3519,6 +3447,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                       RTLIB::FMA_F80, RTLIB::FMA_F128,
                                       RTLIB::FMA_PPCF128));
     break;
+  case ISD::FMAD:
+    llvm_unreachable("Illegal fmad should never be formed");
+
   case ISD::FADD:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
                                       RTLIB::ADD_F80, RTLIB::ADD_F128,
@@ -3545,6 +3476,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
   case ISD::FP_TO_FP16: {
+    if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+      SDValue Op = Node->getOperand(0);
+      MVT SVT = Op.getSimpleValueType();
+      if ((SVT == MVT::f64 || SVT == MVT::f80) &&
+          TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
+        // Under fastmath, we can expand this node into a fround followed by
+        // a float-half conversion.
+        SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+                                       DAG.getIntPtrConstant(0, dl));
+        Results.push_back(
+            DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
+        break;
+      }
+    }
+
     RTLIB::Libcall LC =
         RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
@@ -3579,8 +3525,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
            TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
            "Don't know how to expand this subtraction!");
     Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
-               DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
-    Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT));
+               DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
+                               VT));
+    Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
     Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
     break;
   }
@@ -3696,7 +3643,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
       Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
       Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
-      SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(),
+      SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl,
                                       TLI.getShiftAmountTy(HalfType));
       Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
       Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
@@ -3721,7 +3668,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     EVT ResultType = Node->getValueType(1);
     EVT OType = getSetCCResultType(Node->getValueType(0));
 
-    SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+    SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
 
     //   LHSSign -> LHS >= 0
     //   RHSSign -> RHS >= 0
@@ -3787,9 +3734,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
       Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
       BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
-                               DAG.getIntPtrConstant(0));
+                               DAG.getIntPtrConstant(0, dl));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
-                            DAG.getIntPtrConstant(1));
+                            DAG.getIntPtrConstant(1, dl));
     } else {
       // We can fall back to a libcall with an illegal type for the MUL if we
       // have a libcall big enough.
@@ -3810,9 +3757,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // part.
       unsigned LoSize = VT.getSizeInBits();
       SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
-                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+                                  DAG.getConstant(LoSize - 1, dl,
+                                                  TLI.getPointerTy()));
       SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
-                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+                                  DAG.getConstant(LoSize - 1, dl,
+                                                  TLI.getPointerTy()));
 
       // Here we're passing the 2 arguments explicitly as 4 arguments that are
       // pre-lowered to the correct types. This all depends upon WideVT not
@@ -3821,9 +3770,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
       SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
       BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
-                               DAG.getIntPtrConstant(0));
+                               DAG.getIntPtrConstant(0, dl));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
-                            DAG.getIntPtrConstant(1));
+                            DAG.getIntPtrConstant(1, dl));
       // Ret is a node with an illegal type. Because such things are not
       // generally permitted during this phase of legalization, make sure the
       // node has no more uses. The above EXTRACT_ELEMENT nodes should have been
@@ -3833,14 +3782,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
 
     if (isSigned) {
-      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, dl,
                              TLI.getShiftAmountTy(BottomHalf.getValueType()));
       Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
       TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1,
                              ISD::SETNE);
     } else {
       TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
-                             DAG.getConstant(0, VT), ISD::SETNE);
+                             DAG.getConstant(0, dl, VT), ISD::SETNE);
     }
     Results.push_back(BottomHalf);
     Results.push_back(TopHalf);
@@ -3851,7 +3800,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
     Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
-                       DAG.getConstant(PairTy.getSizeInBits()/2,
+                       DAG.getConstant(PairTy.getSizeInBits()/2, dl,
                                        TLI.getShiftAmountTy(PairTy)));
     Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
     break;
@@ -3866,7 +3815,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                              cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
     } else {
       Tmp1 = DAG.getSelectCC(dl, Tmp1,
-                             DAG.getConstant(0, Tmp1.getValueType()),
+                             DAG.getConstant(0, dl, Tmp1.getValueType()),
                              Tmp2, Tmp3, ISD::SETNE);
     }
     Results.push_back(Tmp1);
@@ -3882,8 +3831,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
-    Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(),
-                       Index, DAG.getConstant(EntrySize, Index.getValueType()));
+    Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+                        DAG.getConstant(EntrySize, dl, Index.getValueType()));
     SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
                                Index, Table);
 
@@ -3917,10 +3866,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // We test only the i1 bit.  Skip the AND if UNDEF.
       Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
         DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
-                    DAG.getConstant(1, Tmp2.getValueType()));
+                    DAG.getConstant(1, dl, Tmp2.getValueType()));
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
                          DAG.getCondCode(ISD::SETNE), Tmp3,
-                         DAG.getConstant(0, Tmp3.getValueType()),
+                         DAG.getConstant(0, dl, Tmp3.getValueType()),
                          Node->getOperand(2));
     }
     Results.push_back(Tmp1);
@@ -3962,7 +3911,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       break;
     }
     Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
-                       DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT),
+                       DAG.getConstant(TrueValue, dl, VT),
+                       DAG.getConstant(0, dl, VT),
                        Tmp3);
     Results.push_back(Tmp1);
     break;
@@ -4030,7 +3980,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
                            Tmp1, Tmp2, Tmp3, Tmp4, CC);
       } else {
-        Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+        Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType());
         CC = DAG.getCondCode(ISD::SETNE);
         Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
                            Tmp2, Tmp3, Tmp4, CC);
@@ -4061,7 +4011,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
                          Tmp4, Tmp2, Tmp3, Node->getOperand(4));
     } else {
-      Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+      Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType());
       Tmp4 = DAG.getCondCode(ISD::SETNE);
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
                          Tmp2, Tmp3, Node->getOperand(4));
@@ -4085,12 +4035,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     for (unsigned Idx = 0; Idx < NumElem; Idx++) {
       SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                VT.getScalarType(),
-                               Node->getOperand(0), DAG.getConstant(Idx,
-                                                    TLI.getVectorIdxTy()));
+                               Node->getOperand(0),
+                               DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
       SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                                VT.getScalarType(),
-                               Node->getOperand(1), DAG.getConstant(Idx,
-                                                    TLI.getVectorIdxTy()));
+                               Node->getOperand(1),
+                               DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
       Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
                                     VT.getScalarType(), Ex, Sh));
     }
@@ -4125,6 +4075,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Node->getOpcode() == ISD::SETCC) {
     OVT = Node->getOperand(0).getSimpleValueType();
   }
+  if (Node->getOpcode() == ISD::BR_CC)
+    OVT = Node->getOperand(2).getSimpleValueType();
   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   SDLoc dl(Node);
   SDValue Tmp1, Tmp2, Tmp3;
@@ -4142,16 +4094,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     if (Node->getOpcode() == ISD::CTTZ) {
       // FIXME: This should set a bit in the zero extended value instead.
       Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT),
-                          Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+                          Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT),
                           ISD::SETEQ);
       Tmp1 = DAG.getSelect(dl, NVT, Tmp2,
-                           DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+                           DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1);
     } else if (Node->getOpcode() == ISD::CTLZ ||
                Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
       // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
       Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
                           DAG.getConstant(NVT.getSizeInBits() -
-                                          OVT.getSizeInBits(), NVT));
+                                          OVT.getSizeInBits(), dl, NVT));
     }
     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
     break;
@@ -4160,7 +4112,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
     Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
-                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+                       DAG.getConstant(DiffBits, dl,
+                                       TLI.getShiftAmountTy(NVT)));
     Results.push_back(Tmp1);
     break;
   }
@@ -4250,7 +4203,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
     else
       Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
-                         DAG.getIntPtrConstant(0));
+                         DAG.getIntPtrConstant(0, dl));
     Results.push_back(Tmp1);
     break;
   }
@@ -4280,27 +4233,74 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
   }
+  case ISD::BR_CC: {
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(1))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+    Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0),
+                                  Node->getOperand(0), Node->getOperand(1),
+                                  Tmp1, Tmp2, Node->getOperand(4)));
+    break;
+  }
   case ISD::FADD:
   case ISD::FSUB:
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:
+  case ISD::FCOPYSIGN:
   case ISD::FPOW: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
     Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
-                                  Tmp3, DAG.getIntPtrConstant(0)));
+                                  Tmp3, DAG.getIntPtrConstant(0, dl)));
     break;
   }
-  case ISD::FLOG2:
-  case ISD::FEXP2:
+  case ISD::FMA: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
+    Results.push_back(
+        DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                    DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
+                    DAG.getIntPtrConstant(0, dl)));
+    break;
+  }
+  case ISD::FPOWI: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp3, DAG.getIntPtrConstant(0, dl)));
+    break;
+  }
+  case ISD::FFLOOR:
+  case ISD::FCEIL:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::FROUND:
+  case ISD::FTRUNC:
+  case ISD::FNEG:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
   case ISD::FLOG:
-  case ISD::FEXP: {
+  case ISD::FLOG2:
+  case ISD::FLOG10:
+  case ISD::FABS:
+  case ISD::FEXP:
+  case ISD::FEXP2: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
-                                  Tmp2, DAG.getIntPtrConstant(0)));
+                                  Tmp2, DAG.getIntPtrConstant(0, dl)));
     break;
   }
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b596715..37fdf44 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -131,7 +131,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
-  return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+  return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N),
                          TLI.getTypeToTransformTo(*DAG.getContext(),
                                                   N->getValueType(0)));
 }
@@ -149,8 +149,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 
   // Mask = ~(1 << (Size-1))
   APInt API = APInt::getAllOnesValue(Size);
-  API.clearBit(Size-1);
-  SDValue Mask = DAG.getConstant(API, NVT);
+  API.clearBit(Size - 1);
+  SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask);
 }
@@ -218,8 +218,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
   unsigned RSize = RVT.getSizeInBits();
 
   // First get the sign bit of second operand.
-  SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
-                                  DAG.getConstant(RSize - 1,
+  SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+                                  DAG.getConstant(RSize - 1, dl,
                                                   TLI.getShiftAmountTy(RVT)));
   SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
 
@@ -227,21 +227,21 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
   int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
   if (SizeDiff > 0) {
     SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
-                          DAG.getConstant(SizeDiff,
+                          DAG.getConstant(SizeDiff, dl,
                                  TLI.getShiftAmountTy(SignBit.getValueType())));
     SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
   } else if (SizeDiff < 0) {
     SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
     SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
-                          DAG.getConstant(-SizeDiff,
+                          DAG.getConstant(-SizeDiff, dl,
                                  TLI.getShiftAmountTy(SignBit.getValueType())));
   }
 
   // Clear the sign bit of the first operand.
-  SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
-                               DAG.getConstant(LSize - 1,
+  SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+                               DAG.getConstant(LSize - 1, dl,
                                                TLI.getShiftAmountTy(LVT)));
-  Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+  Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
   LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
 
   // Or the value with the sign bit.
@@ -386,8 +386,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDLoc dl(N);
   // Expand Y = FNEG(X) -> Y = SUB -0.0, X
-  SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+  SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
                      GetSoftenedFloat(N->getOperand(0)) };
   return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
                                            RTLIB::SUB_F32,
@@ -395,7 +396,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
                                            RTLIB::SUB_F80,
                                            RTLIB::SUB_F128,
                                            RTLIB::SUB_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, 2, false, dl).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -755,7 +756,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
   // If softenSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -794,7 +795,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
   // If softenSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -837,7 +838,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
   if (ST->isTruncatingStore())
     // Do an FP_ROUND followed by a non-truncating store.
     Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
-                                          Val, DAG.getIntPtrConstant(0)));
+                                          Val, DAG.getIntPtrConstant(0, dl)));
   else
     Val = GetSoftenedFloat(Val);
 
@@ -927,12 +928,13 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
   assert(NVT.getSizeInBits() == integerPartWidth &&
          "Do not know how to expand this float constant!");
   APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+  SDLoc dl(N);
   Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
                                  APInt(integerPartWidth, C.getRawData()[1])),
-                         NVT);
+                         dl, NVT);
   Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
                                  APInt(integerPartWidth, C.getRawData()[0])),
-                         NVT);
+                         dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
@@ -1136,9 +1138,10 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
 void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
                                                 SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  Hi = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), NVT, N->getOperand(0));
+  SDLoc dl(N);
+  Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
   Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
-                                 APInt(NVT.getSizeInBits(), 0)), NVT);
+                                 APInt(NVT.getSizeInBits(), 0)), dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
@@ -1262,7 +1265,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
 
   // The low part is zero.
   Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
-                                 APInt(NVT.getSizeInBits(), 0)), NVT);
+                                 APInt(NVT.getSizeInBits(), 0)), dl, NVT);
 
   // Modified the chain - switch anything that used the old chain to use the
   // new one.
@@ -1287,7 +1290,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
     Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                       MVT::i32, Src);
     Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
-                                   APInt(NVT.getSizeInBits(), 0)), NVT);
+                                   APInt(NVT.getSizeInBits(), 0)), dl, NVT);
     Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
   } else {
     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1335,8 +1338,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
   Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
                    DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
                                              APInt(128, Parts)),
-                                     MVT::ppcf128));
-  Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT),
+                                     dl, MVT::ppcf128));
+  Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
                        Lo, Hi, ISD::SETLT);
   GetPairElements(Lo, Lo, Hi);
 }
@@ -1436,7 +1439,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
   // If ExpandSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -1479,7 +1482,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
     SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
                               N->getOperand(0), DAG.getValueType(MVT::f64));
     Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
-                      DAG.getIntPtrConstant(1));
+                      DAG.getIntPtrConstant(1, dl));
     return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
   }
 
@@ -1499,7 +1502,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
            "Logic only correct for ppcf128!");
     const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
     APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
-    SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+    SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
     //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
     // FIXME: generated code sucks.
     return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
@@ -1509,7 +1512,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
                                                                MVT::ppcf128,
                                                                N->getOperand(0),
                                                                Tmp)),
-                                       DAG.getConstant(0x80000000, MVT::i32)),
+                                       DAG.getConstant(0x80000000, dl,
+                                                       MVT::i32)),
                            DAG.getNode(ISD::FP_TO_SINT, dl,
                                        MVT::i32, N->getOperand(0)),
                            ISD::SETGE);
@@ -1529,7 +1533,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
   // If ExpandSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -1579,3 +1583,420 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
   return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr,
                            ST->getMemoryVT(), ST->getMemOperand());
 }
+
+//===----------------------------------------------------------------------===//
+//  Float Operand Promotion
+//===----------------------------------------------------------------------===//
+//
+
+static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f16) {
+      return ISD::FP16_TO_FP;
+  } else if (RetVT == MVT::f16) {
+      return ISD::FP_TO_FP16;
+  }
+
+  report_fatal_error("Attempt at an invalid promotion-related conversion");
+}
+
+bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
+  SDValue R = SDValue();
+
+  // Nodes that use a promotion-requiring floating point operand, but doesn't
+  // produce a promotion-requiring floating point result, need to be legalized
+  // to use the promoted float operand.  Nodes that produce at least one
+  // promotion-requiring floating point result have their operands legalized as
+  // a part of PromoteFloatResult.
+  switch (N->getOpcode()) {
+    default:
+      llvm_unreachable("Do not know how to promote this operator's operand!");
+
+    case ISD::BITCAST:    R = PromoteFloatOp_BITCAST(N, OpNo); break;
+    case ISD::FCOPYSIGN:  R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+    case ISD::FP_EXTEND:  R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
+    case ISD::SELECT_CC:  R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
+    case ISD::SETCC:      R = PromoteFloatOp_SETCC(N, OpNo); break;
+    case ISD::STORE:      R = PromoteFloatOp_STORE(N, OpNo); break;
+  }
+
+  if (R.getNode())
+    ReplaceValueWith(SDValue(N, 0), R);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) {
+  SDValue Op = N->getOperand(0);
+  EVT OpVT = Op->getValueType(0);
+
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  assert (IVT == N->getValueType(0) && "Bitcast to type of different size");
+
+  SDValue Promoted = GetPromotedFloat(N->getOperand(0));
+  EVT PromotedVT = Promoted->getValueType(0);
+
+  // Convert the promoted float value to the desired IVT.
+  return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT,
+                     Promoted);
+}
+
+// Promote Operand 1 of FCOPYSIGN.  Operand 0 ought to be handled by
+// PromoteFloatRes_FCOPYSIGN.
+SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
+  assert (OpNo == 1 && "Only Operand 1 must need promotion here");
+  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+                     N->getOperand(0), Op1);
+}
+
+// Convert the promoted float value to the desired integer type
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
+  SDValue Op = GetPromotedFloat(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
+  SDValue Op = GetPromotedFloat(N->getOperand(0));
+  EVT VT = N->getValueType(0);
+
+  // Desired VT is same as promoted type.  Use promoted float directly.
+  if (VT == Op->getValueType(0))
+    return Op;
+
+  // Else, extend the promoted float value to the desired VT.
+  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op);
+}
+
+// Promote the float operands used for comparison.  The true- and false-
+// operands have the same type as the result and are promoted, if needed, by
+// PromoteFloatRes_SELECT_CC
+SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+  SDValue LHS = GetPromotedFloat(N->getOperand(0));
+  SDValue RHS = GetPromotedFloat(N->getOperand(1));
+
+  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+                     LHS, RHS, N->getOperand(2), N->getOperand(3),
+                     N->getOperand(4));
+}
+
+// Construct a SETCC that compares the promoted values and sets the conditional
+// code.
+SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+  return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode);
+
+}
+
+// Lower the promoted Float down to the integer value of same size and construct
+// a STORE of the integer value.
+SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  SDValue Val = ST->getValue();
+  SDLoc DL(N);
+
+  SDValue Promoted = GetPromotedFloat(Val);
+  EVT VT = ST->getOperand(1)->getValueType(0);
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+  SDValue NewVal;
+  NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL,
+                       IVT, Promoted);
+
+  return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(),
+                      ST->getMemOperand());
+}
+
+//===----------------------------------------------------------------------===//
+//  Float Result Promotion
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
+  SDValue R = SDValue();
+
+  switch (N->getOpcode()) {
+    // These opcodes cannot appear if promotion of FP16 is done in the backend
+    // instead of Clang
+    case ISD::FP16_TO_FP:
+    case ISD::FP_TO_FP16:
+    default:
+      llvm_unreachable("Do not know how to promote this operator's result!");
+
+    case ISD::BITCAST:    R = PromoteFloatRes_BITCAST(N); break;
+    case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
+    case ISD::EXTRACT_VECTOR_ELT:
+                          R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::FCOPYSIGN:  R = PromoteFloatRes_FCOPYSIGN(N); break;
+
+    // Unary FP Operations
+    case ISD::FABS:
+    case ISD::FCEIL:
+    case ISD::FCOS:
+    case ISD::FEXP:
+    case ISD::FEXP2:
+    case ISD::FFLOOR:
+    case ISD::FLOG:
+    case ISD::FLOG2:
+    case ISD::FLOG10:
+    case ISD::FNEARBYINT:
+    case ISD::FNEG:
+    case ISD::FRINT:
+    case ISD::FROUND:
+    case ISD::FSIN:
+    case ISD::FSQRT:
+    case ISD::FTRUNC:     R = PromoteFloatRes_UnaryOp(N); break;
+
+    // Binary FP Operations
+    case ISD::FADD:
+    case ISD::FDIV:
+    case ISD::FMAXNUM:
+    case ISD::FMINNUM:
+    case ISD::FMUL:
+    case ISD::FPOW:
+    case ISD::FREM:
+    case ISD::FSUB:       R = PromoteFloatRes_BinOp(N); break;
+
+    case ISD::FMA:        // FMA is same as FMAD
+    case ISD::FMAD:       R = PromoteFloatRes_FMAD(N); break;
+
+    case ISD::FPOWI:      R = PromoteFloatRes_FPOWI(N); break;
+
+    case ISD::FP_ROUND:   R = PromoteFloatRes_FP_ROUND(N); break;
+    case ISD::LOAD:       R = PromoteFloatRes_LOAD(N); break;
+    case ISD::SELECT:     R = PromoteFloatRes_SELECT(N); break;
+    case ISD::SELECT_CC:  R = PromoteFloatRes_SELECT_CC(N); break;
+
+    case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
+    case ISD::UNDEF:      R = PromoteFloatRes_UNDEF(N); break;
+
+  }
+
+  if (R.getNode())
+    SetPromotedFloat(SDValue(N, ResNo), R);
+}
+
+// Bitcast from i16 to f16:  convert the i16 to a f32 value instead.
+// At this point, it is not possible to determine if the bitcast value is
+// eventually stored to memory or promoted to f32 or promoted to a floating
+// point at a higher precision.  Some of these cases are handled by FP_EXTEND,
+// STORE promotion handlers.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT,
+                     N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) {
+  ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N);
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // Get the (bit-cast) APInt of the APFloat and build an integer constant
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL,
+                              IVT);
+
+  // Convert the Constant to the desired FP type
+  // FIXME We might be able to do the conversion during compilation and get rid
+  // of it from the object code
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C);
+}
+
+// If the Index operand is a constant, try to redirect the extract operation to
+// the correct legalized vector.  If not, bit-convert the input vector to
+// equivalent integer vector.  Extract the element as an (bit-cast) integer
+// value and convert it to the promoted type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDLoc DL(N);
+
+  // If the index is constant, try to extract the value from the legalized
+  // vector type.
+  if (isa<ConstantSDNode>(N->getOperand(1))) {
+    SDValue Vec = N->getOperand(0);
+    SDValue Idx = N->getOperand(1);
+    EVT VecVT = Vec->getValueType(0);
+    EVT EltVT = VecVT.getVectorElementType();
+
+    uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    switch (getTypeAction(VecVT)) {
+    default: break;
+    case TargetLowering::TypeScalarizeVector: {
+      SDValue Res = GetScalarizedVector(N->getOperand(0));
+      ReplaceValueWith(SDValue(N, 0), Res);
+      return SDValue();
+    }
+    case TargetLowering::TypeWidenVector: {
+      Vec = GetWidenedVector(Vec);
+      SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx);
+      ReplaceValueWith(SDValue(N, 0), Res);
+      return SDValue();
+    }
+    case TargetLowering::TypeSplitVector: {
+      SDValue Lo, Hi;
+      GetSplitVector(Vec, Lo, Hi);
+
+      uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+      SDValue Res;
+      if (IdxVal < LoElts)
+        Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx);
+      else
+        Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi,
+                          DAG.getConstant(IdxVal - LoElts, DL,
+                                          Idx.getValueType()));
+      ReplaceValueWith(SDValue(N, 0), Res);
+      return SDValue();
+    }
+
+    }
+  }
+
+  // Bit-convert the input vector to the equivalent integer vector
+  SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+  EVT IVT = NewOp.getValueType().getVectorElementType();
+
+  // Extract the element as an (bit-cast) integer value
+  SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT,
+                               NewOp, N->getOperand(1));
+
+  // Convert the element to the desired FP type
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal);
+}
+
+// FCOPYSIGN(X, Y) returns the value of X with the sign of Y.  If the result
+// needs promotion, so does the argument X.  Note that Y, if needed, will be
+// handled during operand promotion.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+
+  SDValue Op1 = N->getOperand(1);
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Unary operation where the result and the operand have PromoteFloat type
+// action.  Construct a new SDNode with the promoted float value of the old
+// operand.
+SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op = GetPromotedFloat(N->getOperand(0));
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op);
+}
+
+// Binary operations where the result and both operands have PromoteFloat type
+// action.  Construct a new SDNode with the promoted float values of the old
+// operands.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+  SDValue Op2 = GetPromotedFloat(N->getOperand(2));
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2);
+}
+
+// Promote the Float (first) operand and retain the Integer (second) operand
+SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+  SDValue Op1 = N->getOperand(1);
+
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Explicit operation to reduce precision.  Reduce the value to half precision
+// and promote it back to the legal type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
+  SDLoc DL(N);
+
+  SDValue Op = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT OpVT = Op->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+  // Round promoted float to desired precision
+  SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op);
+  // Promote it back to the legal output type
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  EVT VT = N->getValueType(0);
+
+  // Load the value as an integer value with the same number of bits
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+                   IVT, SDLoc(N), L->getChain(), L->getBasePtr(),
+                   L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(),
+                   L->isNonTemporal(), false, L->getAlignment(),
+                   L->getAAInfo());
+  // Legalize the chain result by replacing uses of the old value chain with the
+  // new one
+  ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
+
+  // Convert the integer value to the desired FP type
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL);
+}
+
+// Construct a new SELECT node with the promoted true- and false- values.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
+  SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
+  SDValue FalseVal = GetPromotedFloat(N->getOperand(2));
+
+  return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0),
+                     N->getOperand(0), TrueVal, FalseVal);
+}
+
+// Construct a new SELECT_CC node with the promoted true- and false- values.
+// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
+  SDValue TrueVal = GetPromotedFloat(N->getOperand(2));
+  SDValue FalseVal = GetPromotedFloat(N->getOperand(3));
+
+  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+                     N->getOperand(0), N->getOperand(1), TrueVal, FalseVal,
+                     N->getOperand(4));
+}
+
+// Construct a SDNode that transforms the SINT or UINT operand to the promoted
+// float type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
+}
+
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a4e44cc..eeaebf78 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -71,6 +71,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::VSELECT:     Res = PromoteIntRes_VSELECT(N); break;
   case ISD::SELECT_CC:   Res = PromoteIntRes_SELECT_CC(N); break;
   case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:        Res = PromoteIntRes_SimpleIntBinOp(N); break;
   case ISD::SHL:         Res = PromoteIntRes_SHL(N); break;
   case ISD::SIGN_EXTEND_INREG:
                          Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
@@ -251,6 +255,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   case TargetLowering::TypeSoftenFloat:
     // Promote the integer operand by hand.
     return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+  case TargetLowering::TypePromoteFloat: {
+    // Convert the promoted float by hand.
+    if (NOutVT.bitsEq(NInVT)) {
+      SDValue PromotedOp = GetPromotedFloat(InOp);
+      SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
+      return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc,
+                         DAG.getValueType(OutVT));
+    }
+    break;
+  }
   case TargetLowering::TypeExpandInteger:
   case TargetLowering::TypeExpandFloat:
     break;
@@ -297,7 +311,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
   return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
-                     DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+                     DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT)));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -345,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
   // Subtract off the extra leading bits in the bigger type.
   return DAG.getNode(
       ISD::SUB, dl, NVT, Op,
-      DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(),
+      DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
                       NVT));
 }
 
@@ -366,7 +380,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
     // the top of the original type.
     auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
                                       OVT.getScalarSizeInBits());
-    Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+    Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
   }
   return DAG.getNode(N->getOpcode(), dl, NVT, Op);
 }
@@ -723,9 +737,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   if (N->getOpcode() == ISD::UMULO) {
     // Unsigned overflow occurred if the high part is non-zero.
     SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
-                             DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+                             DAG.getIntPtrConstant(SmallVT.getSizeInBits(),
+                                                   DL));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
-                            DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+                            DAG.getConstant(0, DL, Hi.getValueType()),
+                            ISD::SETNE);
   } else {
     // Signed overflow occurred if the high part does not sign extend the low.
     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
@@ -784,7 +800,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
     SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
     // Shift it to the right position and "or" it in.
     Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
-                       DAG.getConstant(i * RegVT.getSizeInBits(),
+                       DAG.getConstant(i*RegVT.getSizeInBits(), dl,
                                        TLI.getPointerTy()));
     Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
   }
@@ -852,6 +868,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::FP16_TO_FP:
   case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
   case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;
+  case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
 
   case ISD::SHL:
   case ISD::SRA:
@@ -977,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
   SDLoc dl(N);
 
   Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
-                   DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+                   DAG.getConstant(OVT.getSizeInBits(), dl,
+                                   TLI.getPointerTy()));
   return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
 }
 
@@ -1116,7 +1134,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
 
 SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
 
-  assert(OpNo == 2 && "Only know how to promote the mask!");
   SDValue DataOp = N->getValue();
   EVT DataVT = DataOp.getValueType();
   SDValue Mask = N->getMask();
@@ -1127,7 +1144,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
   if (!TLI.isTypeLegal(DataVT)) {
     if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
       DataOp = GetPromotedInteger(DataOp);
-      Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+      if (!TLI.isTypeLegal(MaskVT))
+        Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
       TruncateStore = true;
     }
     else {
@@ -1147,7 +1165,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
 
         unsigned NumConcat = WidenNumElts / MaskNumElts;
         SmallVector<SDValue, 16> Ops(NumConcat);
-        SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+        SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
         Ops[0] = Mask;
         for (unsigned i = 1; i != NumConcat; ++i)
           Ops[i] = ZeroVal;
@@ -1323,92 +1341,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
 std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
   unsigned Opc = Node->getOpcode();
   MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
-  RTLIB::Libcall LC;
-
-  switch (Opc) {
-  default:
-    llvm_unreachable("Unhandled atomic intrinsic Expand!");
-  case ISD::ATOMIC_SWAP:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
-    }
-    break;
-  case ISD::ATOMIC_CMP_SWAP:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_ADD:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_SUB:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_AND:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_OR:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_XOR:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
-    }
-    break;
-  case ISD::ATOMIC_LOAD_NAND:
-    switch (VT.SimpleTy) {
-    default: llvm_unreachable("Unexpected value type for atomic!");
-    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
-    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
-    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
-    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
-    case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
-    }
-    break;
-  }
+  RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
 
   return ExpandChainLibCall(LC, Node, false);
 }
@@ -1417,12 +1351,19 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
 /// and the shift amount is a constant 'Amt'.  Expand the operation.
 void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                                              SDValue &Lo, SDValue &Hi) {
-  assert(Amt && "Expected zero shifts to be already optimized away.");
   SDLoc DL(N);
   // Expand the incoming operand to be shifted, so that we have its parts
   SDValue InL, InH;
   GetExpandedInteger(N->getOperand(0), InL, InH);
 
+  // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
+  // splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
+  if (!Amt) {
+    Lo = InL;
+    Hi = InH;
+    return;
+  }
+
   EVT NVT = InL.getValueType();
   unsigned VTBits = N->getValueType(0).getSizeInBits();
   unsigned NVTBits = NVT.getSizeInBits();
@@ -1430,13 +1371,13 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
 
   if (N->getOpcode() == ISD::SHL) {
     if (Amt > VTBits) {
-      Lo = Hi = DAG.getConstant(0, NVT);
+      Lo = Hi = DAG.getConstant(0, DL, NVT);
     } else if (Amt > NVTBits) {
-      Lo = DAG.getConstant(0, NVT);
+      Lo = DAG.getConstant(0, DL, NVT);
       Hi = DAG.getNode(ISD::SHL, DL,
-                       NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
+                       NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
     } else if (Amt == NVTBits) {
-      Lo = DAG.getConstant(0, NVT);
+      Lo = DAG.getConstant(0, DL, NVT);
       Hi = InL;
     } else if (Amt == 1 &&
                TLI.isOperationLegalOrCustom(ISD::ADDC,
@@ -1448,34 +1389,34 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
       SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
       Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps);
     } else {
-      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
       Hi = DAG.getNode(ISD::OR, DL, NVT,
                        DAG.getNode(ISD::SHL, DL, NVT, InH,
-                                   DAG.getConstant(Amt, ShTy)),
+                                   DAG.getConstant(Amt, DL, ShTy)),
                        DAG.getNode(ISD::SRL, DL, NVT, InL,
-                                   DAG.getConstant(NVTBits-Amt, ShTy)));
+                                   DAG.getConstant(NVTBits - Amt, DL, ShTy)));
     }
     return;
   }
 
   if (N->getOpcode() == ISD::SRL) {
     if (Amt > VTBits) {
-      Lo = DAG.getConstant(0, NVT);
-      Hi = DAG.getConstant(0, NVT);
+      Lo = DAG.getConstant(0, DL, NVT);
+      Hi = DAG.getConstant(0, DL, NVT);
     } else if (Amt > NVTBits) {
       Lo = DAG.getNode(ISD::SRL, DL,
-                       NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
-      Hi = DAG.getConstant(0, NVT);
+                       NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+      Hi = DAG.getConstant(0, DL, NVT);
     } else if (Amt == NVTBits) {
       Lo = InH;
-      Hi = DAG.getConstant(0, NVT);
+      Hi = DAG.getConstant(0, DL, NVT);
     } else {
       Lo = DAG.getNode(ISD::OR, DL, NVT,
                        DAG.getNode(ISD::SRL, DL, NVT, InL,
-                                   DAG.getConstant(Amt, ShTy)),
+                                   DAG.getConstant(Amt, DL, ShTy)),
                        DAG.getNode(ISD::SHL, DL, NVT, InH,
-                                   DAG.getConstant(NVTBits-Amt, ShTy)));
-      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+                                   DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
     }
     return;
   }
@@ -1483,23 +1424,23 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
   assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
   if (Amt > VTBits) {
     Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                          DAG.getConstant(NVTBits-1, ShTy));
+                          DAG.getConstant(NVTBits - 1, DL, ShTy));
   } else if (Amt > NVTBits) {
     Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                     DAG.getConstant(Amt-NVTBits, ShTy));
+                     DAG.getConstant(Amt-NVTBits, DL, ShTy));
     Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                     DAG.getConstant(NVTBits-1, ShTy));
+                     DAG.getConstant(NVTBits - 1, DL, ShTy));
   } else if (Amt == NVTBits) {
     Lo = InH;
     Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                     DAG.getConstant(NVTBits-1, ShTy));
+                     DAG.getConstant(NVTBits - 1, DL, ShTy));
   } else {
     Lo = DAG.getNode(ISD::OR, DL, NVT,
                      DAG.getNode(ISD::SRL, DL, NVT, InL,
-                                 DAG.getConstant(Amt, ShTy)),
+                                 DAG.getConstant(Amt, DL, ShTy)),
                      DAG.getNode(ISD::SHL, DL, NVT, InH,
-                                 DAG.getConstant(NVTBits-Amt, ShTy)));
-    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+                                 DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
   }
 }
 
@@ -1535,21 +1476,21 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   if (KnownOne.intersects(HighBitMask)) {
     // Mask out the high bit, which we know is set.
     Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
-                      DAG.getConstant(~HighBitMask, ShTy));
+                      DAG.getConstant(~HighBitMask, dl, ShTy));
 
     switch (N->getOpcode()) {
     default: llvm_unreachable("Unknown shift");
     case ISD::SHL:
-      Lo = DAG.getConstant(0, NVT);              // Low part is zero.
+      Lo = DAG.getConstant(0, dl, NVT);              // Low part is zero.
       Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
       return true;
     case ISD::SRL:
-      Hi = DAG.getConstant(0, NVT);              // Hi part is zero.
+      Hi = DAG.getConstant(0, dl, NVT);              // Hi part is zero.
       Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
       return true;
     case ISD::SRA:
       Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,       // Sign extend high part.
-                       DAG.getConstant(NVTBits-1, ShTy));
+                       DAG.getConstant(NVTBits - 1, dl, ShTy));
       Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
       return true;
     }
@@ -1562,7 +1503,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     // shift if x is zero.  We can use XOR here because x is known to be smaller
     // than 32.
     SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
-                               DAG.getConstant(NVTBits-1, ShTy));
+                               DAG.getConstant(NVTBits - 1, dl, ShTy));
 
     unsigned Op1, Op2;
     switch (N->getOpcode()) {
@@ -1578,7 +1519,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
     // Use a little trick to get the bits that move from Lo to Hi. First
     // shift by one bit.
-    SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+    SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy));
     // Then compute the remaining shift with amount-1.
     SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
 
@@ -1609,11 +1550,14 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue InL, InH;
   GetExpandedInteger(N->getOperand(0), InL, InH);
 
-  SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+  SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy);
   SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
   SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
   SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy),
                                  Amt, NVBitsNode, ISD::SETULT);
+  SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy),
+                                Amt, DAG.getConstant(0, dl, ShTy),
+                                ISD::SETEQ);
 
   SDValue LoS, HiS, LoL, HiL;
   switch (N->getOpcode()) {
@@ -1623,16 +1567,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
     HiS = DAG.getNode(ISD::OR, dl, NVT,
                       DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
-    // FIXME: If Amt is zero, the following shift generates an undefined result
-    // on some architectures.
                       DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
 
     // Long: ShAmt >= NVTBits
-    LoL = DAG.getConstant(0, NVT);                        // Lo part is zero.
+    LoL = DAG.getConstant(0, dl, NVT);                    // Lo part is zero.
     HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
 
     Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
-    Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
+    Hi = DAG.getSelect(dl, NVT, isZero, InH,
+                       DAG.getSelect(dl, NVT, isShort, HiS, HiL));
     return true;
   case ISD::SRL:
     // Short: ShAmt < NVTBits
@@ -1644,10 +1587,11 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
                       DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
 
     // Long: ShAmt >= NVTBits
-    HiL = DAG.getConstant(0, NVT);                        // Hi part is zero.
+    HiL = DAG.getConstant(0, dl, NVT);                    // Hi part is zero.
     LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
 
-    Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+    Lo = DAG.getSelect(dl, NVT, isZero, InL,
+                       DAG.getSelect(dl, NVT, isShort, LoS, LoL));
     Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
     return true;
   case ISD::SRA:
@@ -1655,16 +1599,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
     LoS = DAG.getNode(ISD::OR, dl, NVT,
                       DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
-    // FIXME: If Amt is zero, the following shift generates an undefined result
-    // on some architectures.
                       DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
 
     // Long: ShAmt >= NVTBits
     HiL = DAG.getNode(ISD::SRA, dl, NVT, InH,             // Sign of Hi part.
-                      DAG.getConstant(NVTBits-1, ShTy));
+                      DAG.getConstant(NVTBits - 1, dl, ShTy));
     LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
 
-    Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+    Lo = DAG.getSelect(dl, NVT, isZero, InL,
+                       DAG.getSelect(dl, NVT, isShort, LoS, LoL));
     Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
     return true;
   }
@@ -1706,18 +1649,50 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
     return;
   }
 
+  bool hasOVF =
+    TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+                                   ISD::UADDO : ISD::USUBO,
+                                 TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+  if (hasOVF) {
+    SDVTList VTList = DAG.getVTList(NVT, NVT);
+    TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+    int RevOpc;
+    if (N->getOpcode() == ISD::ADD) {
+      RevOpc = ISD::SUB;
+      Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
+    } else {
+      RevOpc = ISD::ADD;
+      Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+      Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+    }
+    SDValue OVF = Lo.getValue(1);
+
+    switch (BoolType) {
+    case TargetLoweringBase::UndefinedBooleanContent:
+      OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF);
+      // Fallthrough
+    case TargetLoweringBase::ZeroOrOneBooleanContent:
+      Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
+      break;
+    case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+      Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
+    }
+    return;
+  }
+
   if (N->getOpcode() == ISD::ADD) {
     Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
     Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
     SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
                                 ISD::SETULT);
     SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
-                                   DAG.getConstant(1, NVT),
-                                   DAG.getConstant(0, NVT));
+                                   DAG.getConstant(1, dl, NVT),
+                                   DAG.getConstant(0, dl, NVT));
     SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
                                 ISD::SETULT);
     SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
-                                   DAG.getConstant(1, NVT), Carry1);
+                                   DAG.getConstant(1, dl, NVT), Carry1);
     Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
   } else {
     Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
@@ -1726,8 +1701,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
       DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
                    LoOps[0], LoOps[1], ISD::SETULT);
     SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
-                                   DAG.getConstant(1, NVT),
-                                   DAG.getConstant(0, NVT));
+                                   DAG.getConstant(1, dl, NVT),
+                                   DAG.getConstant(0, dl, NVT));
     Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
   }
 }
@@ -1824,7 +1799,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
     Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part replicates the sign bit of Lo, make it explicit.
     Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                     DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+                     DAG.getConstant(NVTBits - 1, dl, TLI.getPointerTy()));
   }
 }
 
@@ -1844,7 +1819,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
   } else {
     Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part must be zero, make it explicit.
-    Hi = DAG.getConstant(0, NVT);
+    Hi = DAG.getConstant(0, dl, NVT);
   }
 }
 
@@ -1864,8 +1839,9 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
   const APInt &Cst = Constant->getAPIntValue();
   bool IsTarget = Constant->isTargetOpcode();
   bool IsOpaque = Constant->isOpaque();
-  Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT, IsTarget, IsOpaque);
-  Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT, IsTarget,
+  SDLoc dl(N);
+  Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque);
+  Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget,
                        IsOpaque);
 }
 
@@ -1877,15 +1853,16 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   EVT NVT = Lo.getValueType();
 
   SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
-                                   DAG.getConstant(0, NVT), ISD::SETNE);
+                                   DAG.getConstant(0, dl, NVT), ISD::SETNE);
 
   SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
   SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
 
   Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ,
                      DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
-                                 DAG.getConstant(NVT.getSizeInBits(), NVT)));
-  Hi = DAG.getConstant(0, NVT);
+                                 DAG.getConstant(NVT.getSizeInBits(), dl,
+                                                 NVT)));
+  Hi = DAG.getConstant(0, dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
@@ -1896,7 +1873,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
   EVT NVT = Lo.getValueType();
   Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
                    DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
-  Hi = DAG.getConstant(0, NVT);
+  Hi = DAG.getConstant(0, dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
@@ -1907,22 +1884,27 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   EVT NVT = Lo.getValueType();
 
   SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
-                                   DAG.getConstant(0, NVT), ISD::SETNE);
+                                   DAG.getConstant(0, dl, NVT), ISD::SETNE);
 
   SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
   SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
 
   Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ,
                      DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
-                                 DAG.getConstant(NVT.getSizeInBits(), NVT)));
-  Hi = DAG.getConstant(0, NVT);
+                                 DAG.getConstant(NVT.getSizeInBits(), dl,
+                                                 NVT)));
+  Hi = DAG.getConstant(0, dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
+
   SDValue Op = N->getOperand(0);
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+    Op = GetPromotedFloat(Op);
+
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/,
@@ -1934,7 +1916,11 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
+
   SDValue Op = N->getOperand(0);
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+    Op = GetPromotedFloat(Op);
+
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/,
@@ -1980,10 +1966,10 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
       // lo part.
       unsigned LoSize = Lo.getValueType().getSizeInBits();
       Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                       DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+                       DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy()));
     } else if (ExtType == ISD::ZEXTLOAD) {
       // The high part is just a zero.
-      Hi = DAG.getConstant(0, NVT);
+      Hi = DAG.getConstant(0, dl, NVT);
     } else {
       assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
       // The high part is undefined.
@@ -2002,7 +1988,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
                         N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
                         isVolatile, isNonTemporal, isInvariant,
@@ -2029,7 +2015,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
 
     // Increment the pointer to the other half.
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     // Load the rest of the low bits.
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
                         N->getPointerInfo().getWithOffset(IncrementSize),
@@ -2046,12 +2032,12 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
       // Transfer low bits from the bottom of Hi to the top of Lo.
       Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
                        DAG.getNode(ISD::SHL, dl, NVT, Hi,
-                                   DAG.getConstant(ExcessBits,
+                                   DAG.getConstant(ExcessBits, dl,
                                                    TLI.getPointerTy())));
       // Move high bits to the right position in Hi.
       Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
                        NVT, Hi,
-                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
                                        TLI.getPointerTy()));
     }
   }
@@ -2127,7 +2113,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
   //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
   //
   EVT OType = Node->getValueType(1);
-  SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+  SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
 
   SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
   SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
@@ -2148,6 +2134,13 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+  if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+    SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+    SplitInteger(Res.getValue(0), Lo, Hi);
+    return;
+  }
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i16)
@@ -2160,7 +2153,6 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
     LC = RTLIB::SDIV_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
 
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
 }
 
@@ -2276,7 +2268,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
     // The high part is obtained by SRA'ing all but one of the bits of low part.
     unsigned LoSize = NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                     DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+                     DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy()));
   } else {
     // For example, extension of an i48 to an i64.  The operand type necessarily
     // promotes to the result type, so will end up being expanded too.
@@ -2310,7 +2302,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
     // The high part gets the sign extension from the lo-part.  This handles
     // things like sextinreg V:i64 from i8.
     Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
-                     DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+                     DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl,
                                      TLI.getPointerTy()));
   } else {
     // For example, extension of an i48 to an i64.  Leave the low part alone,
@@ -2327,6 +2319,13 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+  if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+    SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+    SplitInteger(Res.getValue(1), Lo, Hi);
+    return;
+  }
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i16)
@@ -2339,7 +2338,6 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
     LC = RTLIB::SREM_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
 
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
 }
 
@@ -2350,7 +2348,8 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
   Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
   Hi = DAG.getNode(ISD::SRL, dl,
                    N->getOperand(0).getValueType(), N->getOperand(0),
-                   DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+                   DAG.getConstant(NVT.getSizeInBits(), dl,
+                                   TLI.getPointerTy()));
   Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
 }
 
@@ -2392,14 +2391,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
     // A divide for UMULO will be faster than a function call. Select to
     // make sure we aren't using 0.
     SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT),
-                                  RHS, DAG.getConstant(0, VT), ISD::SETEQ);
+                                  RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ);
     SDValue NotZero = DAG.getSelect(dl, VT, isZero,
-                                    DAG.getConstant(1, VT), RHS);
+                                    DAG.getConstant(1, dl, VT), RHS);
     SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
     SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
                                     ISD::SETNE);
     Overflow = DAG.getSelect(dl, N->getValueType(1), isZero,
-                             DAG.getConstant(0, N->getValueType(1)),
+                             DAG.getConstant(0, dl, N->getValueType(1)),
                              Overflow);
     ReplaceValueWith(SDValue(N, 1), Overflow);
     return;
@@ -2422,7 +2421,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   SDValue Temp = DAG.CreateStackTemporary(PtrVT);
   // Temporary for the overflow value, default it to zero.
   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
-                               DAG.getConstant(0, PtrVT), Temp,
+                               DAG.getConstant(0, dl, PtrVT), Temp,
                                MachinePointerInfo(), false, false, 0);
 
   TargetLowering::ArgListTy Args;
@@ -2457,7 +2456,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
                               MachinePointerInfo(), false, false, false, 0);
   SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
-                             DAG.getConstant(0, PtrVT),
+                             DAG.getConstant(0, dl, PtrVT),
                              ISD::SETNE);
   // Use the overflow from the libcall everywhere.
   ReplaceValueWith(SDValue(N, 1), Ofl);
@@ -2467,6 +2466,13 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+  if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+    SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+    SplitInteger(Res.getValue(0), Lo, Hi);
+    return;
+  }
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i16)
@@ -2479,7 +2485,6 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
     LC = RTLIB::UDIV_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
 
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
 }
 
@@ -2487,6 +2492,13 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+  if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+    SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+    SplitInteger(Res.getValue(1), Lo, Hi);
+    return;
+  }
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i16)
@@ -2499,7 +2511,6 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
     LC = RTLIB::UREM_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
 
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
   SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
 }
 
@@ -2511,7 +2522,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
   if (Op.getValueType().bitsLE(NVT)) {
     // The low part is zero extension of the input (degenerates to a copy).
     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
-    Hi = DAG.getConstant(0, NVT);   // The high part is just a zero.
+    Hi = DAG.getConstant(0, dl, NVT);   // The high part is just a zero.
   } else {
     // For example, extension of an i48 to an i64.  The operand type necessarily
     // promotes to the result type, so will end up being expanded too.
@@ -2536,7 +2547,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
   SDLoc dl(N);
   EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
   SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
-  SDValue Zero = DAG.getConstant(0, VT);
+  SDValue Zero = DAG.getConstant(0, dl, VT);
   SDValue Swap = DAG.getAtomicCmpSwap(
       ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
       cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
@@ -2637,7 +2648,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
     NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
     NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
     NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType());
     return;
   }
 
@@ -2726,7 +2737,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
   // If ExpandSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -2744,7 +2755,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
   // If ExpandSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (!NewRHS.getNode()) {
-    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
     CCCode = ISD::SETNE;
   }
 
@@ -2839,7 +2850,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
                            N->getPointerInfo().getWithOffset(IncrementSize),
                            NEVT, isVolatile, isNonTemporal,
@@ -2861,11 +2872,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   if (ExcessBits < NVT.getSizeInBits()) {
     // Transfer high bits from the top of Lo to the bottom of Hi.
     Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
-                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
                                      TLI.getPointerTy()));
     Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
                      DAG.getNode(ISD::SRL, dl, NVT, Lo,
-                                 DAG.getConstant(ExcessBits,
+                                 DAG.getConstant(ExcessBits, dl,
                                                  TLI.getPointerTy())));
   }
 
@@ -2875,7 +2886,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
   // Store the lowest ExcessBits bits in the second half.
   Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
                          N->getPointerInfo().getWithOffset(IncrementSize),
@@ -2931,7 +2942,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
     GetExpandedInteger(Op, Lo, Hi);
     SDValue SignSet = DAG.getSetCC(dl,
                                    getSetCCResultType(Hi.getValueType()),
-                                   Hi, DAG.getConstant(0, Hi.getValueType()),
+                                   Hi,
+                                   DAG.getConstant(0, dl, Hi.getValueType()),
                                    ISD::SETLT);
 
     // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
@@ -2940,8 +2952,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
                                            TLI.getPointerTy());
 
     // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
-    SDValue Zero = DAG.getIntPtrConstant(0);
-    SDValue Four = DAG.getIntPtrConstant(4);
+    SDValue Zero = DAG.getIntPtrConstant(0, dl);
+    SDValue Four = DAG.getIntPtrConstant(4, dl);
     if (TLI.isBigEndian()) std::swap(Zero, Four);
     SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
                                    Zero, Four);
@@ -2999,7 +3011,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
 
     // Extract the element from the original vector.
     SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
-      BaseIdx, DAG.getConstant(i, BaseIdx.getValueType()));
+      BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType()));
     SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
       InVT.getVectorElementType(), N->getOperand(0), Index);
 
@@ -3017,17 +3029,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
 
-  unsigned NumElts = VT.getVectorNumElements();
-  SmallVector<int, 8> NewMask;
-  for (unsigned i = 0; i != NumElts; ++i) {
-    NewMask.push_back(SV->getMaskElt(i));
-  }
+  ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());
 
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
   SDValue V1 = GetPromotedInteger(N->getOperand(1));
   EVT OutVT = V0.getValueType();
 
-  return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
+  return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
 }
 
 
@@ -3097,7 +3105,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
     SDValue Op = N->getOperand(i);
     for (unsigned j = 0; j < NumElem; ++j) {
       SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                InElemTy, Op, DAG.getConstant(j,
+                                InElemTy, Op, DAG.getConstant(j, dl,
                                               TLI.getVectorIdxTy()));
       Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
     }
@@ -3135,6 +3143,16 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
+  SDLoc dl(N);
+  SDValue V0 = GetPromotedInteger(N->getOperand(0));
+  MVT InVT = V0.getValueType().getSimpleVT();
+  MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(),
+                               N->getValueType(0).getVectorNumElements());
+  SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1));
+  return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
   SDLoc dl(N);
   unsigned NumElems = N->getNumOperands();
@@ -3153,7 +3171,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
     for (unsigned i=0; i<NumElem; ++i) {
       // Extract element from incoming vector
       SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
-      Incoming, DAG.getConstant(i, TLI.getVectorIdxTy()));
+      Incoming, DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
       SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
       NewOps.push_back(Tr);
     }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index ebf6b28..9c29769 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -259,6 +259,10 @@ bool DAGTypeLegalizer::run() {
         WidenVectorResult(N, i);
         Changed = true;
         goto NodeDone;
+      case TargetLowering::TypePromoteFloat:
+        PromoteFloatResult(N, i);
+        Changed = true;
+        goto NodeDone;
       }
     }
 
@@ -308,6 +312,10 @@ ScanOperands:
         NeedsReanalyzing = WidenVectorOperand(N, i);
         Changed = true;
         break;
+      case TargetLowering::TypePromoteFloat:
+        NeedsReanalyzing = PromoteFloatOperand(N, i);
+        Changed = true;
+        break;
       }
       break;
     }
@@ -753,6 +761,17 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
   OpEntry = Result;
 }
 
+void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for promoted float");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = PromotedFloats[Op];
+  assert(!OpEntry.getNode() && "Node is already promoted!");
+  OpEntry = Result;
+}
+
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
   // Note that in some cases vector operation operands may be greater than
   // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
@@ -978,9 +997,9 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair,
   SDLoc dl(Pair);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
   Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
-                   DAG.getIntPtrConstant(0));
+                   DAG.getIntPtrConstant(0, dl));
   Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
-                   DAG.getIntPtrConstant(1));
+                   DAG.getIntPtrConstant(1, dl));
 }
 
 SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
@@ -993,7 +1012,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
 
   Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
-                      DAG.getConstant(EltSize, Index.getValueType()));
+                      DAG.getConstant(EltSize, dl, Index.getValueType()));
   return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
 }
 
@@ -1010,7 +1029,8 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
   Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
   Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
-                   DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+                   DAG.getConstant(LVT.getSizeInBits(), dlHi,
+                                   TLI.getPointerTy()));
   return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
 }
 
@@ -1096,7 +1116,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
          Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
   Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
   Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
-                   DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+                   DAG.getConstant(LoVT.getSizeInBits(), dl,
+                                   TLI.getPointerTy()));
   Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index cef3fc9..2f27789 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -93,6 +93,11 @@ private:
   /// the same size, this map indicates the converted value to use.
   SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
 
+  /// PromotedFloats - For floating point nodes that have a smaller precision
+  /// than the smallest supported precision, this map indicates what promoted
+  /// value to use.
+  SmallDenseMap<SDValue, SDValue, 8> PromotedFloats;
+
   /// ExpandedFloats - For float nodes that need to be expanded this map
   /// indicates which operands are the expanded version of the input.
   SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
@@ -273,6 +278,7 @@ private:
   SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
   SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
   SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
@@ -499,6 +505,44 @@ private:
   void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
                                 ISD::CondCode &CCCode, SDLoc dl);
 
+
+  //===--------------------------------------------------------------------===//
+  // Float promotion support: LegalizeFloatTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  SDValue GetPromotedFloat(SDValue Op) {
+    SDValue &PromotedOp = PromotedFloats[Op];
+    RemapValue(PromotedOp);
+    assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+    return PromotedOp;
+  }
+  void SetPromotedFloat(SDValue Op, SDValue Result);
+
+  void PromoteFloatResult(SDNode *N, unsigned ResNo);
+  SDValue PromoteFloatRes_BITCAST(SDNode *N);
+  SDValue PromoteFloatRes_BinOp(SDNode *N);
+  SDValue PromoteFloatRes_ConstantFP(SDNode *N);
+  SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
+  SDValue PromoteFloatRes_FMAD(SDNode *N);
+  SDValue PromoteFloatRes_FPOWI(SDNode *N);
+  SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
+  SDValue PromoteFloatRes_LOAD(SDNode *N);
+  SDValue PromoteFloatRes_SELECT(SDNode *N);
+  SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
+  SDValue PromoteFloatRes_UnaryOp(SDNode *N);
+  SDValue PromoteFloatRes_UNDEF(SDNode *N);
+  SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
+
+  bool PromoteFloatOperand(SDNode *N, unsigned ResNo);
+  SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
+  SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
+
   //===--------------------------------------------------------------------===//
   // Scalarization Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//
@@ -582,6 +626,7 @@ private:
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -593,14 +638,16 @@ private:
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
+  SDValue SplitVecOp_TruncateHelper(SDNode *N);
 
   SDValue SplitVecOp_BITCAST(SDNode *N);
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
-  SDValue SplitVecOp_TRUNCATE(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 38829b6..330c31c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -50,6 +50,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
     case TargetLowering::TypeLegal:
     case TargetLowering::TypePromoteInteger:
       break;
+    case TargetLowering::TypePromoteFloat:
+      llvm_unreachable("Bitcast of a promotion-needing float should never need"
+                       "expansion");
     case TargetLowering::TypeSoftenFloat:
       // Convert the integer operand instead.
       SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
@@ -117,7 +120,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       SmallVector<SDValue, 8> Vals;
       for (unsigned i = 0; i < NumElems; ++i)
         Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
-                                   CastInOp, DAG.getConstant(i,
+                                   CastInOp, DAG.getConstant(i, dl,
                                              TLI.getVectorIdxTy())));
 
       // Build Lo, Hi pair by pairing extracted elements if needed.
@@ -170,7 +173,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
   StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                         DAG.getConstant(IncrementSize,
+                         DAG.getConstant(IncrementSize, dl,
                                          StackPtr.getValueType()));
 
   // Load the second half from the stack slot.
@@ -235,7 +238,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
 
   Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
-                    DAG.getConstant(1, Idx.getValueType()));
+                    DAG.getConstant(1, dl, Idx.getValueType()));
   Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
 
   if (TLI.isBigEndian())
@@ -267,7 +270,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
   Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
                    isVolatile, isNonTemporal, isInvariant,
@@ -436,7 +439,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
   Idx = DAG.getNode(ISD::ADD, dl,
                     Idx.getValueType(), Idx,
-                    DAG.getConstant(1, Idx.getValueType()));
+                    DAG.getConstant(1, dl, Idx.getValueType()));
   NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
 
   // Convert the new vector to the old vector type.
@@ -485,7 +488,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
                     isVolatile, isNonTemporal, Alignment, AAInfo);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
   Hi = DAG.getStore(Chain, dl, Hi, Ptr,
                     St->getPointerInfo().getWithOffset(IncrementSize),
                     isVolatile, isNonTemporal,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3a8c276..c06227b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -196,6 +196,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
 
   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
 
+  bool HasVectorValue = false;
   if (Op.getOpcode() == ISD::LOAD) {
     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -207,6 +208,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
         return TranslateLegalizeResults(Op, Result);
       case TargetLowering::Custom:
         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
+          if (Lowered == Result)
+            return TranslateLegalizeResults(Op, Lowered);
           Changed = true;
           if (Lowered->getNumValues() != Op->getNumValues()) {
             // This expanded to something other than the load. Assume the
@@ -232,16 +235,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
       default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         return TranslateLegalizeResults(Op, Result);
-      case TargetLowering::Custom:
-        Changed = true;
-        return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG));
+      case TargetLowering::Custom: {
+        SDValue Lowered = TLI.LowerOperation(Result, DAG);
+        Changed = Lowered != Result;
+        return TranslateLegalizeResults(Op, Lowered);
+      }
       case TargetLowering::Expand:
         Changed = true;
         return LegalizeOp(ExpandStore(Op));
       }
-  }
+  } else if (Op.getOpcode() == ISD::MSCATTER)
+    HasVectorValue = true;
 
-  bool HasVectorValue = false;
   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
        J != E;
        ++J)
@@ -317,6 +322,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::ANY_EXTEND_VECTOR_INREG:
   case ISD::SIGN_EXTEND_VECTOR_INREG:
   case ISD::ZERO_EXTEND_VECTOR_INREG:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
     QueryType = Node->getValueType(0);
     break;
   case ISD::FP_ROUND_INREG:
@@ -326,6 +335,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::UINT_TO_FP:
     QueryType = Node->getOperand(0).getValueType();
     break;
+  case ISD::MSCATTER:
+    QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
+    break;
   }
 
   switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
@@ -375,8 +387,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
 
   // There are currently two cases of vector promotion:
   // 1) Bitcasting a vector of integers to a different type to a vector of the
-  //    same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64.
-  // 2) Extending a vector of floats to a vector of the same number oflarger
+  //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
+  // 2) Extending a vector of floats to a vector of the same number of larger
   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
   MVT VT = Op.getSimpleValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
@@ -403,7 +415,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
-    return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));
+    return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
   else
     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 }
@@ -512,7 +524,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
         ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
                                  LD->getPointerInfo().getWithOffset(Offset),
                                  LD->isVolatile(), LD->isNonTemporal(),
-                                 LD->isInvariant(), LD->getAlignment(),
+                                 LD->isInvariant(),
+                                 MinAlign(LD->getAlignment(), Offset),
                                  LD->getAAInfo());
       } else {
         EVT LoadVT = WideVT;
@@ -524,13 +537,15 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
                                     LD->getPointerInfo().getWithOffset(Offset),
                                     LoadVT, LD->isVolatile(),
                                     LD->isNonTemporal(), LD->isInvariant(),
-                                    LD->getAlignment(), LD->getAAInfo());
+                                    MinAlign(LD->getAlignment(), Offset),
+                                    LD->getAAInfo());
       }
 
       RemainingBytes -= LoadBytes;
       Offset += LoadBytes;
       BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
-                            DAG.getConstant(LoadBytes, BasePTR.getValueType()));
+                            DAG.getConstant(LoadBytes, dl,
+                                            BasePTR.getValueType()));
 
       LoadVals.push_back(ScalarLoad.getValue(0));
       LoadChains.push_back(ScalarLoad.getValue(1));
@@ -538,7 +553,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
 
     // Extract bits, pack and extend/trunc them into destination type.
     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
-    SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
+    SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
 
     unsigned BitOffset = 0;
     unsigned WideIdx = 0;
@@ -548,7 +563,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
       SDValue Lo, Hi, ShAmt;
 
       if (BitOffset < WideBits) {
-        ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
+        ShAmt = DAG.getConstant(BitOffset, dl, TLI.getShiftAmountTy(WideVT));
         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
       }
@@ -558,7 +573,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
         WideIdx++;
         BitOffset -= WideBits;
         if (BitOffset > 0) {
-          ShAmt = DAG.getConstant(SrcEltBits - BitOffset,
+          ShAmt = DAG.getConstant(SrcEltBits - BitOffset, dl,
                                   TLI.getShiftAmountTy(WideVT));
           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
@@ -577,7 +592,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
         break;
       case ISD::SEXTLOAD:
-        ShAmt = DAG.getConstant(WideBits - SrcEltBits,
+        ShAmt = DAG.getConstant(WideBits - SrcEltBits, dl,
                                 TLI.getShiftAmountTy(WideVT));
         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
@@ -595,10 +610,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
                 Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
                 SrcVT.getScalarType(),
                 LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
-                LD->getAlignment(), LD->getAAInfo());
+                MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
 
       BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
-                         DAG.getConstant(Stride, BasePTR.getValueType()));
+                         DAG.getConstant(Stride, dl, BasePTR.getValueType()));
 
       Vals.push_back(ScalarLoad.getValue(0));
       LoadChains.push_back(ScalarLoad.getValue(1));
@@ -649,15 +664,16 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
   SmallVector<SDValue, 8> Stores;
   for (unsigned Idx = 0; Idx < NumElem; Idx++) {
     SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-               RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+               RegSclVT, Value, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
 
     // This scalar TruncStore may be illegal, but we legalize it later.
     SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
                ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
-               isVolatile, isNonTemporal, Alignment, AAInfo);
+               isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride),
+               AAInfo);
 
     BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
-                               DAG.getConstant(Stride, BasePTR.getValueType()));
+                          DAG.getConstant(Stride, dl, BasePTR.getValueType()));
 
     Stores.push_back(Store);
   }
@@ -727,8 +743,9 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   EVT BitTy = MaskTy.getScalarType();
 
   Mask = DAG.getSelect(DL, BitTy, Mask,
-          DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
-          DAG.getConstant(0, BitTy));
+          DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
+                          BitTy),
+          DAG.getConstant(0, DL, BitTy));
 
   // Broadcast the mask so that the entire vector is all-one or all zero.
   SmallVector<SDValue, 8> Ops(NumElem, Mask);
@@ -741,7 +758,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
 
   SDValue AllOnes = DAG.getConstant(
-            APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
+            APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
 
   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
@@ -763,7 +780,7 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
 
   unsigned BW = VT.getScalarType().getSizeInBits();
   unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
-  SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
+  SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
 
   Op = Op.getOperand(0);
   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
@@ -810,7 +827,7 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
   // without full scalarization than the sign extension does.
   unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
   unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
-  SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT);
+  SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
   return DAG.getNode(ISD::SRA, DL, VT,
                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
                      ShiftAmount);
@@ -829,7 +846,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
 
   // Build up a zero vector to blend into this one.
   EVT SrcScalarVT = SrcVT.getScalarType();
-  SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT);
+  SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT);
   SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
   SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
 
@@ -910,7 +927,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
 
   SDValue AllOnes = DAG.getConstant(
-    APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
+    APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT);
   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
 
   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
@@ -933,16 +950,16 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
       "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
 
   unsigned BW = SVT.getSizeInBits();
-  SDValue HalfWord = DAG.getConstant(BW/2, VT);
+  SDValue HalfWord = DAG.getConstant(BW/2, DL, VT);
 
   // Constants to clear the upper part of the word.
   // Notice that we can also use SHL+SHR, but using a constant is slightly
   // faster on x86.
   uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
-  SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+  SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
 
   // Two to the power of half-word-size.
-  SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+  SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType());
 
   // Clear upper part of LO, lower HI
   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
@@ -961,8 +978,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
 
 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
-    SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
-    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+    SDLoc DL(Op);
+    SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+    return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
                        Zero, Op.getOperand(0));
   }
   return DAG.UnrollVectorOp(Op.getNode());
@@ -978,16 +996,16 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   SmallVector<SDValue, 8> Ops(NumElems);
   for (unsigned i = 0; i < NumElems; ++i) {
     SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
-                                  DAG.getConstant(i, TLI.getVectorIdxTy()));
+                                  DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
     SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
-                                  DAG.getConstant(i, TLI.getVectorIdxTy()));
+                                  DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
     Ops[i] = DAG.getNode(ISD::SETCC, dl,
                          TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT),
                          LHSElem, RHSElem, CC);
     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
                            DAG.getConstant(APInt::getAllOnesValue
-                                           (EltVT.getSizeInBits()), EltVT),
-                           DAG.getConstant(0, EltVT));
+                                           (EltVT.getSizeInBits()), dl, EltVT),
+                           DAG.getConstant(0, dl, EltVT));
   }
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 63671f7..445e882 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -252,7 +252,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
   } else {
     EVT VT = OpVT.getVectorElementType();
     Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
-                      DAG.getConstant(0, TLI.getVectorIdxTy()));
+                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
   }
   return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
 }
@@ -308,7 +308,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
                VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
         // Vector read from all ones, scalar expects a single 1 so mask.
         Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
-                           Cond, DAG.getConstant(1, CondVT));
+                           Cond, DAG.getConstant(1, SDLoc(N), CondVT));
         break;
       case TargetLowering::ZeroOrNegativeOneBooleanContent:
         assert(VecBool == TargetLowering::UndefinedBooleanContent ||
@@ -385,9 +385,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
   } else {
     EVT VT = OpVT.getVectorElementType();
     LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
-                      DAG.getConstant(0, TLI.getVectorIdxTy()));
+                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
     RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
-                      DAG.getConstant(0, TLI.getVectorIdxTy()));
+                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
   }
 
   // Turn it into a scalar SETCC.
@@ -600,6 +600,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::MLOAD:
     SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
     break;
+  case ISD::MGATHER:
+    SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
+    break;
   case ISD::SETCC:
     SplitVecRes_SETCC(N, Lo, Hi);
     break;
@@ -668,6 +671,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::UREM:
   case ISD::SREM:
   case ISD::FREM:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
     SplitVecRes_BinOp(N, Lo, Hi);
     break;
   case ISD::FMA:
@@ -723,6 +730,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
   switch (getTypeAction(InVT)) {
   case TargetLowering::TypeLegal:
   case TargetLowering::TypePromoteInteger:
+  case TargetLowering::TypePromoteFloat:
   case TargetLowering::TypeSoftenFloat:
   case TargetLowering::TypeScalarizeVector:
   case TargetLowering::TypeWidenVector:
@@ -810,7 +818,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
   uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
-                   DAG.getConstant(IdxVal + LoVT.getVectorNumElements(),
+                   DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
                                    TLI.getVectorIdxTy()));
 }
 
@@ -844,7 +852,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
   StackPtr =
       DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                  DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+                  DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -891,7 +899,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                        Lo.getValueType(), Lo, Elt, Idx);
     else
       Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
-                       DAG.getConstant(IdxVal - LoNumElts,
+                       DAG.getConstant(IdxVal - LoNumElts, dl,
                                        TLI.getVectorIdxTy()));
     return;
   }
@@ -923,7 +931,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // Increment the pointer to the other part.
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
   StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                       DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+                         DAG.getConstant(IncrementSize, dl,
+                                         StackPtr.getValueType()));
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -966,7 +975,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
   Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
                    HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
@@ -1021,7 +1030,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
 
   MMO = DAG.getMachineFunction().
     getMachineMemOperand(MLD->getPointerInfo(), 
@@ -1043,6 +1052,54 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
 
 }
 
+void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  SDLoc dl(MGT);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
+
+  SDValue Ch = MGT->getChain();
+  SDValue Ptr = MGT->getBasePtr();
+  SDValue Mask = MGT->getMask();
+  unsigned Alignment = MGT->getOriginalAlignment();
+
+  SDValue MaskLo, MaskHi;
+  std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+  EVT MemoryVT = MGT->getMemoryVT();
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue Src0Lo, Src0Hi;
+  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+
+  SDValue IndexHi, IndexLo;
+  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl);
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(), 
+                         MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
+                         Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
+                           MMO);
+
+  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
+                           MMO);
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                   Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(MGT, 1), Ch);
+}
+
+
 void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
   assert(N->getValueType(0).isVector() &&
          N->getOperand(0).getValueType().isVector() &&
@@ -1236,8 +1293,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
 
         // Extract the vector element by hand.
         SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                    Inputs[Input], DAG.getConstant(Idx,
-                                                   TLI.getVectorIdxTy())));
+                                    Inputs[Input],
+                                    DAG.getConstant(Idx, dl,
+                                                    TLI.getVectorIdxTy())));
       }
 
       // Construct the Lo/Hi output using a BUILD_VECTOR.
@@ -1293,7 +1351,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
     case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
-    case ISD::TRUNCATE:          Res = SplitVecOp_TRUNCATE(N); break;
+    case ISD::TRUNCATE:
+      Res = SplitVecOp_TruncateHelper(N);
+      break;
     case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1301,21 +1361,37 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::MSTORE:
       Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
       break;
+    case ISD::MSCATTER:
+      Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+      break;
+    case ISD::MGATHER:
+      Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+      break;
     case ISD::VSELECT:
       Res = SplitVecOp_VSELECT(N, OpNo);
       break;
-    case ISD::CTTZ:
-    case ISD::CTLZ:
-    case ISD::CTPOP:
-    case ISD::FP_EXTEND:
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
+      if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+        Res = SplitVecOp_TruncateHelper(N);
+      else
+        Res = SplitVecOp_UnaryOp(N);
+      break;
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
-    case ISD::FTRUNC:
+      if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+        Res = SplitVecOp_TruncateHelper(N);
+      else
+        Res = SplitVecOp_UnaryOp(N);
+      break;
+    case ISD::CTTZ:
+    case ISD::CTLZ:
+    case ISD::CTPOP:
+    case ISD::FP_EXTEND:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::ANY_EXTEND:
+    case ISD::FTRUNC:
       Res = SplitVecOp_UnaryOp(N);
       break;
     }
@@ -1420,7 +1496,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
   } else {
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
-                       DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+                       DAG.getConstant(IdxVal - LoElts, dl,
+                                       Idx.getValueType()));
   }
 }
 
@@ -1441,7 +1518,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
     if (IdxVal < LoElts)
       return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
     return SDValue(DAG.UpdateNodeOperands(N, Hi,
-                                  DAG.getConstant(IdxVal - LoElts,
+                                  DAG.getConstant(IdxVal - LoElts, SDLoc(N),
                                                   Idx.getValueType())), 0);
   }
 
@@ -1462,6 +1539,68 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
                         MachinePointerInfo(), EltVT, false, false, false, 0);
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
+                                             unsigned OpNo) {
+  EVT LoVT, HiVT;
+  SDLoc dl(MGT);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
+
+  SDValue Ch = MGT->getChain();
+  SDValue Ptr = MGT->getBasePtr();
+  SDValue Index = MGT->getIndex();
+  SDValue Mask = MGT->getMask();
+  unsigned Alignment = MGT->getOriginalAlignment();
+
+  SDValue MaskLo, MaskHi;
+  std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+  EVT MemoryVT = MGT->getMemoryVT();
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue Src0Lo, Src0Hi;
+  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+
+  SDValue IndexHi, IndexLo;
+  if (Index.getNode())
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+  else
+    IndexLo = IndexHi = Index;
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(), 
+                         MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
+                         Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+  SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
+                                   OpsLo, MMO);
+
+  MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(), 
+                         MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
+                         Alignment, MGT->getAAInfo(),
+                         MGT->getRanges());
+
+  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+  SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
+                                   OpsHi, MMO);
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                   Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(MGT, 1), Ch);
+
+  SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
+                            Hi);
+  ReplaceValueWith(SDValue(MGT, 0), Res);
+  return SDValue();
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
                                             unsigned OpNo) {
   SDValue Ch  = N->getChain();
@@ -1497,7 +1636,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
 
   MMO = DAG.getMachineFunction().
     getMachineMemOperand(N->getPointerInfo(), 
@@ -1507,11 +1646,64 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
   Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
                           N->isTruncatingStore());
 
-
   // Build a factor node to remember that this store is independent of the
   // other one.
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
 
+SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
+                                              unsigned OpNo) {
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  SDValue Mask = N->getMask();
+  SDValue Index = N->getIndex();
+  SDValue Data = N->getValue();
+  EVT MemoryVT = N->getMemoryVT();
+  unsigned Alignment = N->getOriginalAlignment();
+  SDLoc DL(N);
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue DataLo, DataHi;
+  GetSplitVector(Data, DataLo, DataHi);
+  SDValue MaskLo, MaskHi;
+  GetSplitVector(Mask, MaskLo, MaskHi);
+
+    SDValue PtrLo, PtrHi;
+  if (Ptr.getValueType().isVector()) // gather form vector of pointers
+    std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL);
+  else
+    PtrLo = PtrHi = Ptr;
+
+  SDValue IndexHi, IndexLo;
+  if (Index.getNode())
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+  else
+    IndexLo = IndexHi = Index;
+
+  SDValue Lo, Hi;
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(N->getPointerInfo(), 
+                         MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+                         Alignment, N->getAAInfo(), N->getRanges());
+
+  SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo};
+  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+                            DL, OpsLo, MMO);
+
+  MMO = DAG.getMachineFunction().
+    getMachineMemOperand(N->getPointerInfo(), 
+                         MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
+                         Alignment, N->getAAInfo(), N->getRanges());
+
+  SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi};
+  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+                            DL, OpsHi, MMO);
+
+  // Build a factor node to remember that this store is independent of the
+  // other one.
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1544,7 +1736,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                    DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
 
   if (isTruncating)
     Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
@@ -1573,7 +1765,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
     for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
          i != e; ++i) {
       Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
-                                 Op, DAG.getConstant(i, TLI.getVectorIdxTy())));
+                                 Op, DAG.getConstant(i, DL, TLI.getVectorIdxTy())));
 
     }
   }
@@ -1581,7 +1773,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   // The result type is legal, but the input type is illegal.  If splitting
   // ends up with the result type of each half still being legal, just
   // do that.  If, however, that would result in an illegal result type,
@@ -1603,6 +1795,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   EVT InVT = InVec->getValueType(0);
   EVT OutVT = N->getValueType(0);
   unsigned NumElements = OutVT.getVectorNumElements();
+  bool IsFloat = OutVT.isFloatingPoint();
+  
   // Widening should have already made sure this is a power-two vector
   // if we're trying to split it at all. assert() that's true, just in case.
   assert(!(NumElements & 1) && "Splitting vector, but not in half!");
@@ -1621,11 +1815,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   SDValue InLoVec, InHiVec;
   std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
   // Truncate them to 1/2 the element size.
-  EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+  EVT HalfElementVT = IsFloat ?
+    EVT::getFloatingPointVT(InElementSize/2) :
+    EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
                                 NumElements/2);
-  SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
-  SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+  SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+  SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
   // Concatenate them to get the full intermediate truncation result.
   EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
   SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -1634,7 +1830,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   // type. This should normally be something that ends up being legal directly,
   // but in theory if a target has very wide vectors and an annoyingly
   // restricted set of legal types, this split can chain to build things up.
-  return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+  return IsFloat ?
+    DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
+                DAG.getTargetConstant(0, DL, TLI.getPointerTy())) :
+    DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
@@ -1865,9 +2064,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
   while (CurNumElts != 0) {
     while (CurNumElts >= NumElts) {
       SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
-                                 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+                                 DAG.getConstant(Idx, dl,
+                                                 TLI.getVectorIdxTy()));
       SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
-                                 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+                                 DAG.getConstant(Idx, dl,
+                                                 TLI.getVectorIdxTy()));
       ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
       Idx += NumElts;
       CurNumElts -= NumElts;
@@ -1880,11 +2081,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
     if (NumElts == 1) {
       for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
         SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
-                                   InOp1, DAG.getConstant(Idx,
-                                                         TLI.getVectorIdxTy()));
+                                   InOp1,
+                                   DAG.getConstant(Idx, dl,
+                                                   TLI.getVectorIdxTy()));
         SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
-                                   InOp2, DAG.getConstant(Idx,
-                                                         TLI.getVectorIdxTy()));
+                                   InOp2,
+                                   DAG.getConstant(Idx, dl,
+                                                   TLI.getVectorIdxTy()));
         ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
                                              EOp1, EOp2);
       }
@@ -1922,8 +2125,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
       unsigned NumToInsert = ConcatEnd - Idx - 1;
       for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
         VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
-                            ConcatOps[OpIdx], DAG.getConstant(i,
-                                                         TLI.getVectorIdxTy()));
+                            ConcatOps[OpIdx],
+                            DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
       }
       ConcatOps[Idx+1] = VecOp;
       ConcatEnd = Idx + 2;
@@ -2009,9 +2212,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     }
 
     if (InVTNumElts % WidenNumElts == 0) {
-      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
-                                  InOp, DAG.getConstant(0,
-                                                        TLI.getVectorIdxTy()));
+      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+                                  DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
       // Extract the input and convert the shorten input vector.
       if (N->getNumOperands() == 1)
         return DAG.getNode(Opcode, DL, WidenVT, InVal);
@@ -2026,7 +2228,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   unsigned i;
   for (i=0; i < MinElts; ++i) {
     SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
-                              DAG.getConstant(i, TLI.getVectorIdxTy()));
+                              DAG.getConstant(i, DL, TLI.getVectorIdxTy()));
     if (N->getNumOperands() == 1)
       Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
     else
@@ -2114,6 +2316,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
       return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
     break;
   case TargetLowering::TypeSoftenFloat:
+  case TargetLowering::TypePromoteFloat:
   case TargetLowering::TypeExpandInteger:
   case TargetLowering::TypeExpandFloat:
   case TargetLowering::TypeScalarizeVector:
@@ -2252,7 +2455,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                               DAG.getConstant(j, TLI.getVectorIdxTy()));
+                               DAG.getConstant(j, dl, TLI.getVectorIdxTy()));
   }
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; Idx < WidenNumElts; ++Idx)
@@ -2310,7 +2513,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
     if (InVTNumElts % WidenNumElts == 0) {
       // Extract the input and convert the shorten input vector.
       InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
-                         DAG.getConstant(0, TLI.getVectorIdxTy()));
+                         DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
       return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
                                   SatOp, CvtCode);
     }
@@ -2326,7 +2529,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   unsigned i;
   for (i=0; i < MinElts; ++i) {
     SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                 DAG.getConstant(i, TLI.getVectorIdxTy()));
+                                 DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
     Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
                                   SatOp, CvtCode);
   }
@@ -2369,7 +2572,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   unsigned i;
   for (i=0; i < NumElts; ++i)
     Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                         DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy()));
+                         DAG.getConstant(IdxVal + i, dl, TLI.getVectorIdxTy()));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; i < WidenNumElts; ++i)
@@ -2432,7 +2635,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
 
     unsigned NumConcat = WidenNumElts / MaskNumElts;
     SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+    SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
     Ops[0] = Mask;
     for (unsigned i = 1; i != NumConcat; ++i)
       Ops[i] = ZeroVal;
@@ -2553,6 +2756,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
   assert(InVT.isVector() && "can not widen non-vector type");
   EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
                                    InVT.getVectorElementType(), WidenNumElts);
+
+  // The input and output types often differ here, and it could be that while
+  // we'd prefer to widen the result type, the input operands have been split.
+  // In this case, we also need to split the result of this node as well.
+  if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+    SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
+    SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
+    return Res;
+  }
+
   InOp1 = GetWidenedVector(InOp1);
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
 
@@ -2662,10 +2875,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
         if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
           InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
                              DAG.getUNDEF(FixedVT), InOp,
-                             DAG.getConstant(0, TLI.getVectorIdxTy()));
+                             DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
         else
           InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
-                             DAG.getConstant(0, TLI.getVectorIdxTy()));
+                             DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
         break;
       }
     }
@@ -2710,7 +2923,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   for (unsigned i=0; i < NumElts; ++i)
     Ops[i] = DAG.getNode(Opcode, dl, EltVT,
                          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                     DAG.getConstant(i, TLI.getVectorIdxTy())));
+                                     DAG.getConstant(i, dl,
+                                                     TLI.getVectorIdxTy())));
 
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
@@ -2731,7 +2945,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
     if (TLI.isTypeLegal(NewVT)) {
       SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
-                         DAG.getConstant(0, TLI.getVectorIdxTy()));
+                         DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
     }
   }
 
@@ -2759,7 +2973,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
       Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                               DAG.getConstant(j, TLI.getVectorIdxTy()));
+                               DAG.getConstant(j, dl, TLI.getVectorIdxTy()));
   }
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
@@ -2814,7 +3028,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
 
     unsigned NumConcat = WidenNumElts / MaskNumElts;
     SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+    SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
     Ops[0] = Mask;
     for (unsigned i = 1; i != NumConcat; ++i)
       Ops[i] = ZeroVal;
@@ -2849,8 +3063,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
                                SVT.getVectorElementType(),
                                N->getValueType(0).getVectorNumElements());
   SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
-                           ResVT, WideSETCC, DAG.getConstant(0,
-                                             TLI.getVectorIdxTy()));
+                           ResVT, WideSETCC,
+                           DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
 
   return PromoteTargetBoolean(CC, N->getValueType(0));
 }
@@ -2888,7 +3102,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     unsigned MemVTWidth = MemVT.getSizeInBits();
     if (MemVT.getSizeInBits() <= WidenEltWidth)
       break;
-    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+    auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+    if ((Action == TargetLowering::TypeLegal ||
+         Action == TargetLowering::TypePromoteInteger) &&
+        (WidenWidth % MemVTWidth) == 0 &&
         isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2944,7 +3161,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
       LdTy = NewLdTy;
     }
     VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
-                        DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
+                        DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy()));
   }
   return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
 }
@@ -3015,7 +3232,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
     unsigned Increment = NewVTWidth / 8;
     Offset += Increment;
     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                          DAG.getConstant(Increment, BasePtr.getValueType()));
+                          DAG.getConstant(Increment, dl, BasePtr.getValueType()));
 
     SDValue L;
     if (LdWidth < NewVTWidth) {
@@ -3141,7 +3358,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
   for (i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
                                      BasePtr,
-                                     DAG.getConstant(Offset,
+                                     DAG.getConstant(Offset, dl,
                                                      BasePtr.getValueType()));
     Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
                             LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
@@ -3192,7 +3409,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
       unsigned NumVTElts = NewVT.getVectorNumElements();
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
-                                   DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+                                  DAG.getConstant(Idx, dl,
+                                                  TLI.getVectorIdxTy()));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
                                     ST->getPointerInfo().getWithOffset(Offset),
                                        isVolatile, isNonTemporal,
@@ -3201,7 +3419,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
         Offset += Increment;
         Idx += NumVTElts;
         BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                              DAG.getConstant(Increment, BasePtr.getValueType()));
+                              DAG.getConstant(Increment, dl,
+                                              BasePtr.getValueType()));
       } while (StWidth != 0 && StWidth >= NewVTWidth);
     } else {
       // Cast the vector to the scalar type we can store
@@ -3212,7 +3431,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
       Idx = Idx * ValEltWidth / NewVTWidth;
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
-                      DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
+                      DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy()));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
                                     ST->getPointerInfo().getWithOffset(Offset),
                                        isVolatile, isNonTemporal,
@@ -3220,7 +3439,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
         StWidth -= NewVTWidth;
         Offset += Increment;
         BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                            DAG.getConstant(Increment, BasePtr.getValueType()));
+                              DAG.getConstant(Increment, dl,
+                                              BasePtr.getValueType()));
       } while (StWidth != 0 && StWidth >= NewVTWidth);
       // Restore index back to be relative to the original widen element type
       Idx = Idx * NewVTWidth / ValEltWidth;
@@ -3258,7 +3478,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
   unsigned Increment = ValEltVT.getSizeInBits() / 8;
   unsigned NumElts = StVT.getVectorNumElements();
   SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                            DAG.getConstant(0, TLI.getVectorIdxTy()));
+                            DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
   StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
                                       ST->getPointerInfo(), StEltVT,
                                       isVolatile, isNonTemporal, Align,
@@ -3266,10 +3486,11 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
   unsigned Offset = Increment;
   for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
-                                     BasePtr, DAG.getConstant(Offset,
-                                                       BasePtr.getValueType()));
+                                     BasePtr,
+                                     DAG.getConstant(Offset, dl,
+                                                     BasePtr.getValueType()));
     SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                            DAG.getConstant(0, TLI.getVectorIdxTy()));
+                            DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
     StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
                                       ST->getPointerInfo().getWithOffset(Offset),
                                         StEltVT, isVolatile, isNonTemporal,
@@ -3306,7 +3527,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
 
   if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
-                       DAG.getConstant(0, TLI.getVectorIdxTy()));
+                       DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
 
   // Fall back to extract and build.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
@@ -3315,7 +3536,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
   unsigned Idx;
   for (Idx = 0; Idx < MinNumElts; ++Idx)
     Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                           DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+                           DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for ( ; Idx < WidenNumElts; ++Idx)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index db38b76..6303422 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -47,7 +47,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
   TRI = STI.getRegisterInfo();
   TLI = IS->TLI;
   TII = STI.getInstrInfo();
-  ResourcesModel = TII->CreateTargetScheduleState(STI);
+  ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
   // This hard requirement could be relaxed, but for now
   // do not let it procede.
   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
@@ -637,17 +637,3 @@ void ResourcePriorityQueue::remove(SUnit *SU) {
 
   Queue.pop_back();
 }
-
-
-#ifdef NDEBUG
-void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
-#else
-void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
-  ResourcePriorityQueue q = *this;
-  while (!q.empty()) {
-    SUnit *su = q.pop();
-    dbgs() << "Height " << su->getHeight() << ": ";
-    su->dump(DAG);
-  }
-}
-#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index bce69d7..c27f8de 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -35,7 +35,6 @@ public:
     FRAMEIX = 2             // value is contents of a stack location
   };
 private:
-  enum DbgValueKind kind;
   union {
     struct {
       SDNode *Node;         // valid for expressions
@@ -46,17 +45,18 @@ private:
   } u;
   MDNode *Var;
   MDNode *Expr;
-  bool IsIndirect;
   uint64_t Offset;
   DebugLoc DL;
   unsigned Order;
-  bool Invalid;
+  enum DbgValueKind kind;
+  bool IsIndirect;
+  bool Invalid = false;
+
 public:
   // Constructor for non-constants.
   SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
              uint64_t off, DebugLoc dl, unsigned O)
-      : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O),
-        Invalid(false) {
+      : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(indir) {
     kind = SDNODE;
     u.s.Node = N;
     u.s.ResNo = R;
@@ -65,8 +65,7 @@ public:
   // Constructor for constants.
   SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
              DebugLoc dl, unsigned O)
-      : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O),
-        Invalid(false) {
+      : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) {
     kind = CONST;
     u.Const = C;
   }
@@ -74,8 +73,7 @@ public:
   // Constructor for frame indices.
   SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
              unsigned O)
-      : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O),
-        Invalid(false) {
+      : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) {
     kind = FRAMEIX;
     u.FrameIx = FI;
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 5222de1..fd0fa31 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -173,7 +173,7 @@ public:
       HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
   }
 
-  ~ScheduleDAGRRList() {
+  ~ScheduleDAGRRList() override {
     delete HazardRec;
     delete AvailableQueue;
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f2b18fc..3853ada 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -137,13 +137,9 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
 }
 
 // Helper for AddGlue to clone node operands.
-static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
-                                SmallVectorImpl<EVT> &VTs,
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
                                 SDValue ExtraOper = SDValue()) {
-  SmallVector<SDValue, 8> Ops;
-  for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
-    Ops.push_back(N->getOperand(I));
-
+  SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
   if (ExtraOper.getNode())
     Ops.push_back(ExtraOper);
 
@@ -165,7 +161,6 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
 }
 
 static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
-  SmallVector<EVT, 4> VTs;
   SDNode *GlueDestNode = Glue.getNode();
 
   // Don't add glue from a node to itself.
@@ -179,9 +174,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
   // Don't add glue to something that already has a glue value.
   if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
 
-  for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
-    VTs.push_back(N->getValueType(I));
-
+  SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
   if (AddGlue)
     VTs.push_back(MVT::Glue);
 
@@ -197,11 +190,8 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
           !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
          "expected an unused glue value");
 
-  SmallVector<EVT, 4> VTs;
-  for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I)
-    VTs.push_back(N->getValueType(I));
-
-  CloneNodeWithValues(N, DAG, VTs);
+  CloneNodeWithValues(N, DAG,
+                      makeArrayRef(N->value_begin(), N->getNumValues() - 1));
 }
 
 /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 2cd1f4b..6351fa2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -44,7 +44,7 @@ namespace llvm {
 
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
-    virtual ~ScheduleDAGSDNodes() {}
+    ~ScheduleDAGSDNodes() override {}
 
     /// Run - perform scheduling.
     ///
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 418b58e..eee4a4b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -76,7 +76,7 @@ public:
     HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
   }
 
-  ~ScheduleDAGVLIW() {
+  ~ScheduleDAGVLIW() override {
     delete HazardRec;
     delete AvailableQueue;
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f75d5f4..efd4bd9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -49,6 +49,7 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cmath>
+#include <utility>
 
 using namespace llvm;
 
@@ -196,6 +197,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
   return true;
 }
 
+/// \brief Return true if the specified node is a BUILD_VECTOR node of
+/// all ConstantFPSDNode or undef.
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Op = N->getOperand(i);
+    if (Op.getOpcode() == ISD::UNDEF)
+      continue;
+    if (!isa<ConstantFPSDNode>(Op))
+      return false;
+  }
+  return true;
+}
+
 /// isScalarToVector - Return true if the specified node is a
 /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
 /// element is not an undef.
@@ -499,8 +516,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::SUB:
   case ISD::SHL: {
     const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N);
-    AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(),
-                          BinNode->hasNoSignedWrap(), BinNode->isExact());
+    AddBinaryNodeIDCustom(
+        ID, N->getOpcode(), BinNode->Flags.hasNoUnsignedWrap(),
+        BinNode->Flags.hasNoSignedWrap(), BinNode->Flags.hasExact());
     break;
   }
   case ISD::ATOMIC_CMP_SWAP:
@@ -860,7 +878,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
   AddNodeIDCustom(ID, N);
-  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
   return Node;
 }
 
@@ -878,7 +896,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
   AddNodeIDCustom(ID, N);
-  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
   return Node;
 }
 
@@ -895,7 +913,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
   AddNodeIDCustom(ID, N);
-  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
   return Node;
 }
 
@@ -947,9 +965,9 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
   if (isBinOpWithFlags(Opcode)) {
     BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(
         Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
-    FN->setHasNoUnsignedWrap(nuw);
-    FN->setHasNoSignedWrap(nsw);
-    FN->setIsExact(exact);
+    FN->Flags.setNoUnsignedWrap(nuw);
+    FN->Flags.setNoSignedWrap(nsw);
+    FN->Flags.setExact(exact);
 
     return FN;
   }
@@ -959,6 +977,40 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
   return N;
 }
 
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+                                          void *&InsertPos) {
+  SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  if (N) {
+    switch (N->getOpcode()) {
+    default: break;
+    case ISD::Constant:
+    case ISD::ConstantFP:
+      llvm_unreachable("Querying for Constant and ConstantFP nodes requires "
+                       "debug location.  Use another overload.");
+    }
+  }
+  return N;
+}
+
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+                                          DebugLoc DL, void *&InsertPos) {
+  SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  if (N) {
+    switch (N->getOpcode()) {
+    default: break; // Process only regular (non-target) constant nodes.
+    case ISD::Constant:
+    case ISD::ConstantFP:
+      // Erase debug location from the node if the node is used at several
+      // different places to do not propagate one location to all uses as it
+      // leads to incorrect debug info.
+      if (N->getDebugLoc() != DL)
+        N->setDebugLoc(DebugLoc());
+      break;
+    }
+  }
+  return N;
+}
+
 void SelectionDAG::clear() {
   allnodes_clear();
   OperandAllocator.Reset();
@@ -1014,7 +1066,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
   APInt Imm = APInt::getLowBitsSet(BitWidth,
                                    VT.getSizeInBits());
   return getNode(ISD::AND, DL, Op.getValueType(), Op,
-                 getConstant(Imm, Op.getValueType()));
+                 getConstant(Imm, DL, Op.getValueType()));
 }
 
 SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
@@ -1052,7 +1104,7 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
 SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
   EVT EltVT = VT.getScalarType();
   SDValue NegOne =
-    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
   return getNode(ISD::XOR, DL, VT, Val, NegOne);
 }
 
@@ -1062,31 +1114,33 @@ SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {
   switch (TLI->getBooleanContents(VT)) {
     case TargetLowering::ZeroOrOneBooleanContent:
     case TargetLowering::UndefinedBooleanContent:
-      TrueValue = getConstant(1, VT);
+      TrueValue = getConstant(1, DL, VT);
       break;
     case TargetLowering::ZeroOrNegativeOneBooleanContent:
-      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
                               VT);
       break;
   }
   return getNode(ISD::XOR, DL, VT, Val, TrueValue);
 }
 
-SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) {
+SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT,
+                                  bool isO) {
   EVT EltVT = VT.getScalarType();
   assert((EltVT.getSizeInBits() >= 64 ||
          (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
          "getConstant with a uint64_t value that doesn't fit in the type!");
-  return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO);
+  return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
 }
 
-SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO)
+SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT,
+                                  bool isO)
 {
-  return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO);
+  return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO);
 }
 
-SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
-                                  bool isO) {
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
+                                  bool isT, bool isO) {
   assert(VT.isInteger() && "Cannot create FP integer constant!");
 
   EVT EltVT = VT.getScalarType();
@@ -1125,7 +1179,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
     SmallVector<SDValue, 2> EltParts;
     for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
       EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
-                                           .trunc(ViaEltSizeInBits),
+                                           .trunc(ViaEltSizeInBits), DL,
                                      ViaEltVT, isT, isO));
     }
 
@@ -1160,12 +1214,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
   ID.AddBoolean(isO);
   void *IP = nullptr;
   SDNode *N = nullptr;
-  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+  if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)))
     if (!VT.isVector())
       return SDValue(N, 0);
 
   if (!N) {
-    N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT);
+    N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(),
+                                           EltVT);
     CSEMap.InsertNode(N, IP);
     InsertNode(N);
   }
@@ -1179,16 +1234,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
   return Result;
 }
 
-SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
-  return getConstant(Val, TLI->getPointerTy(), isTarget);
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) {
+  return getConstant(Val, DL, TLI->getPointerTy(), isTarget);
 }
 
-
-SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
-  return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT,
+                                    bool isTarget) {
+  return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
 }
 
-SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT,
+                                    bool isTarget){
   assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
 
   EVT EltVT = VT.getScalarType();
@@ -1202,12 +1258,13 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   ID.AddPointer(&V);
   void *IP = nullptr;
   SDNode *N = nullptr;
-  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+  if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)))
     if (!VT.isVector())
       return SDValue(N, 0);
 
   if (!N) {
-    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
+    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(),
+                                             EltVT);
     CSEMap.InsertNode(N, IP);
     InsertNode(N);
   }
@@ -1216,25 +1273,25 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   if (VT.isVector()) {
     SmallVector<SDValue, 8> Ops;
     Ops.assign(VT.getVectorNumElements(), Result);
-    // FIXME SDLoc info might be appropriate here
     Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops);
   }
   return Result;
 }
 
-SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT,
+                                    bool isTarget) {
   EVT EltVT = VT.getScalarType();
   if (EltVT==MVT::f32)
-    return getConstantFP(APFloat((float)Val), VT, isTarget);
+    return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
   else if (EltVT==MVT::f64)
-    return getConstantFP(APFloat(Val), VT, isTarget);
+    return getConstantFP(APFloat(Val), DL, VT, isTarget);
   else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||
            EltVT==MVT::f16) {
     bool ignored;
     APFloat apf = APFloat(Val);
     apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
                 &ignored);
-    return getConstantFP(apf, VT, isTarget);
+    return getConstantFP(apf, DL, VT, isTarget);
   } else
     llvm_unreachable("Unsupported type in getConstantFP");
 }
@@ -1264,7 +1321,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
   ID.AddInteger(TargetFlags);
   ID.AddInteger(GV->getType()->getAddressSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(),
@@ -1281,7 +1338,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
   AddNodeIDNode(ID, Opc, getVTList(VT), None);
   ID.AddInteger(FI);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
@@ -1300,7 +1357,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
   ID.AddInteger(JTI);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
@@ -1326,7 +1383,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   ID.AddPointer(C);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
@@ -1353,7 +1410,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   C->addSelectionDAGCSEId(ID);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
@@ -1371,7 +1428,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
   ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
@@ -1386,7 +1443,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
   AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
   ID.AddPointer(MBB);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
@@ -1446,13 +1503,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
 // N2 to point at N1.
 static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
   std::swap(N1, N2);
-  int NElts = M.size();
-  for (int i = 0; i != NElts; ++i) {
-    if (M[i] >= NElts)
-      M[i] -= NElts;
-    else if (M[i] >= 0)
-      M[i] += NElts;
-  }
+  ShuffleVectorSDNode::commuteMask(M);
 }
 
 SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
@@ -1484,6 +1535,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
   if (N1.getOpcode() == ISD::UNDEF)
     commuteShuffle(N1, N2, MaskVec);
 
+  // If shuffling a splat, try to blend the splat instead. We do this here so
+  // that even when this arises during lowering we don't have to re-handle it.
+  auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+    BitVector UndefElements;
+    SDValue Splat = BV->getSplatValue(&UndefElements);
+    if (!Splat)
+      return;
+
+    for (int i = 0; i < (int)NElts; ++i) {
+      if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts))
+        continue;
+
+      // If this input comes from undef, mark it as such.
+      if (UndefElements[MaskVec[i] - Offset]) {
+        MaskVec[i] = -1;
+        continue;
+      }
+
+      // If we can blend a non-undef lane, use that instead.
+      if (!UndefElements[i])
+        MaskVec[i] = i + Offset;
+    }
+  };
+  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+    BlendSplat(N1BV, 0);
+  if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+    BlendSplat(N2BV, NElts);
+
   // Canonicalize all index into lhs, -> shuffle lhs, undef
   // Canonicalize all index into rhs, -> shuffle rhs, undef
   bool AllLHS = true, AllRHS = true;
@@ -1513,9 +1592,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
     return getUNDEF(VT);
 
   // If Identity shuffle return that node.
-  bool Identity = true;
+  bool Identity = true, AllSame = true;
   for (unsigned i = 0; i != NElts; ++i) {
     if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+    if (MaskVec[i] != MaskVec[0]) AllSame = false;
   }
   if (Identity && NElts)
     return N1;
@@ -1537,18 +1617,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
       if (Splat && Splat.getOpcode() == ISD::UNDEF)
         return getUNDEF(VT);
 
+      bool SameNumElts =
+          V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
       // We only have a splat which can skip shuffles if there is a splatted
       // value and no undef lanes rearranged by the shuffle.
       if (Splat && UndefElements.none()) {
         // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
         // number of elements match or the value splatted is a zero constant.
-        if (V.getValueType().getVectorNumElements() ==
-            VT.getVectorNumElements())
+        if (SameNumElts)
           return N1;
         if (auto *C = dyn_cast<ConstantSDNode>(Splat))
           if (C->isNullValue())
             return N1;
       }
+
+      // If the shuffle itself creates a splat, build the vector directly.
+      if (AllSame && SameNumElts) {
+        const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+        SmallVector<SDValue, 8> Ops(NElts, Splatted);
+
+        EVT BuildVT = BV->getValueType(0);
+        SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops);
+
+        // We may have jumped through bitcasts, so the type of the
+        // BUILD_VECTOR may not match the type of the shuffle.
+        if (BuildVT != VT)
+          NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+        return NewBV;
+      }
     }
   }
 
@@ -1559,7 +1656,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
     ID.AddInteger(MaskVec[i]);
 
   void* IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   // Allocate the mask array for the node out of the BumpPtrAllocator, since
@@ -1579,19 +1676,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
 
 SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
   MVT VT = SV.getSimpleValueType(0);
-  unsigned NumElems = VT.getVectorNumElements();
-  SmallVector<int, 8> MaskVec;
-
-  for (unsigned i = 0; i != NumElems; ++i) {
-    int Idx = SV.getMaskElt(i);
-    if (Idx >= 0) {
-      if (Idx < (int)NumElems)
-        Idx += NumElems;
-      else
-        Idx -= NumElems;
-    }
-    MaskVec.push_back(Idx);
-  }
+  SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+  ShuffleVectorSDNode::commuteMask(MaskVec);
 
   SDValue Op0 = SV.getOperand(0);
   SDValue Op1 = SV.getOperand(1);
@@ -1612,7 +1698,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
   SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
   AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops);
   void* IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(),
@@ -1628,7 +1714,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
   ID.AddInteger(RegNo);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
@@ -1642,7 +1728,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
   AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
   ID.AddPointer(RegMask);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
@@ -1657,7 +1743,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
   AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
   ID.AddPointer(Label);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(),
@@ -1680,7 +1766,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
   ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
@@ -1699,7 +1785,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   ID.AddPointer(V);
 
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
@@ -1715,7 +1801,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
   ID.AddPointer(MD);
 
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
@@ -1734,7 +1820,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
   ID.AddInteger(DestAS);
 
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(),
@@ -1791,13 +1877,14 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
   switch (Cond) {
   default: break;
   case ISD::SETFALSE:
-  case ISD::SETFALSE2: return getConstant(0, VT);
+  case ISD::SETFALSE2: return getConstant(0, dl, VT);
   case ISD::SETTRUE:
   case ISD::SETTRUE2: {
     TargetLowering::BooleanContent Cnt =
         TLI->getBooleanContents(N1->getValueType(0));
     return getConstant(
-        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
+        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
+        VT);
   }
 
   case ISD::SETOEQ:
@@ -1821,16 +1908,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
 
       switch (Cond) {
       default: llvm_unreachable("Unknown integer setcc!");
-      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
-      case ISD::SETNE:  return getConstant(C1 != C2, VT);
-      case ISD::SETULT: return getConstant(C1.ult(C2), VT);
-      case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
-      case ISD::SETULE: return getConstant(C1.ule(C2), VT);
-      case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
-      case ISD::SETLT:  return getConstant(C1.slt(C2), VT);
-      case ISD::SETGT:  return getConstant(C1.sgt(C2), VT);
-      case ISD::SETLE:  return getConstant(C1.sle(C2), VT);
-      case ISD::SETGE:  return getConstant(C1.sge(C2), VT);
+      case ISD::SETEQ:  return getConstant(C1 == C2, dl, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, dl, VT);
+      case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
+      case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
+      case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
+      case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
+      case ISD::SETLT:  return getConstant(C1.slt(C2), dl, VT);
+      case ISD::SETGT:  return getConstant(C1.sgt(C2), dl, VT);
+      case ISD::SETLE:  return getConstant(C1.sle(C2), dl, VT);
+      case ISD::SETGE:  return getConstant(C1.sge(C2), dl, VT);
       }
     }
   }
@@ -1842,41 +1929,41 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
       case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
-      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
       case ISD::SETNE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
       case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpLessThan, VT);
+                                           R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETLT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
-      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETGT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
-      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
       case ISD::SETLE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
       case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
-                                           R==APFloat::cmpEqual, VT);
+                                           R==APFloat::cmpEqual, dl, VT);
       case ISD::SETGE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
                         // fall through
       case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpEqual, VT);
-      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, VT);
-      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, VT);
+                                           R==APFloat::cmpEqual, dl, VT);
+      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, dl, VT);
+      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, dl, VT);
       case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
-                                           R==APFloat::cmpEqual, VT);
-      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+                                           R==APFloat::cmpEqual, dl, VT);
+      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
       case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
-                                           R==APFloat::cmpLessThan, VT);
+                                           R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpUnordered, VT);
-      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
-      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+                                           R==APFloat::cmpUnordered, dl, VT);
+      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
+      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
       }
     } else {
       // Ensure that the constant occurs on the RHS.
@@ -2323,6 +2410,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
     break;
   }
+  case ISD::EXTRACT_ELEMENT: {
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    const unsigned Index =
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    const unsigned BitWidth = Op.getValueType().getSizeInBits();
+
+    // Remove low part of known bits mask
+    KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
+    KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+
+    // Remove high part of known bit mask
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+    break;
+  }
   case ISD::FrameIndex:
   case ISD::TargetFrameIndex:
     if (unsigned Align = InferPtrAlignment(Op)) {
@@ -2522,6 +2624,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     // FIXME: it's tricky to do anything useful for this, but it is an important
     // case for targets like X86.
     break;
+  case ISD::EXTRACT_ELEMENT: {
+    const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    const int BitWidth = Op.getValueType().getSizeInBits();
+    const int Items =
+      Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+
+    // Get reverse index (starting from 1), Op1 value indexes elements from
+    // little end. Sign starts at big end.
+    const int rIndex = Items - 1 -
+      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+    // If the sign portion ends in our element the substraction gives correct
+    // result. Otherwise it gives either negative or > bitwidth result
+    return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+  }
   }
 
   // If we are looking at the loaded value of the SDNode.
@@ -2643,7 +2760,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opcode, getVTList(VT), None);
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(),
@@ -2666,12 +2783,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     switch (Opcode) {
     default: break;
     case ISD::SIGN_EXTEND:
-      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT,
+      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::TRUNCATE:
-      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT,
+      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
@@ -2680,29 +2797,29 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
                                  APFloat::rmNearestTiesToEven);
-      return getConstantFP(apf, VT);
+      return getConstantFP(apf, DL, VT);
     }
     case ISD::BITCAST:
       if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
-        return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT);
+        return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
-        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
+        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
       else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
-        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
+        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
       break;
     case ISD::BSWAP:
-      return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(),
+      return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
     case ISD::CTPOP:
-      return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(),
+      return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
     case ISD::CTLZ:
     case ISD::CTLZ_ZERO_UNDEF:
-      return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(),
+      return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
     case ISD::CTTZ:
     case ISD::CTTZ_ZERO_UNDEF:
-      return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(),
+      return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
     }
   }
@@ -2713,26 +2830,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     switch (Opcode) {
     case ISD::FNEG:
       V.changeSign();
-      return getConstantFP(V, VT);
+      return getConstantFP(V, DL, VT);
     case ISD::FABS:
       V.clearSign();
-      return getConstantFP(V, VT);
+      return getConstantFP(V, DL, VT);
     case ISD::FCEIL: {
       APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
       if (fs == APFloat::opOK || fs == APFloat::opInexact)
-        return getConstantFP(V, VT);
+        return getConstantFP(V, DL, VT);
       break;
     }
     case ISD::FTRUNC: {
       APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
       if (fs == APFloat::opOK || fs == APFloat::opInexact)
-        return getConstantFP(V, VT);
+        return getConstantFP(V, DL, VT);
       break;
     }
     case ISD::FFLOOR: {
       APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
       if (fs == APFloat::opOK || fs == APFloat::opInexact)
-        return getConstantFP(V, VT);
+        return getConstantFP(V, DL, VT);
       break;
     }
     case ISD::FP_EXTEND: {
@@ -2741,7 +2858,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       // FIXME need to be more flexible about rounding mode.
       (void)V.convert(EVTToAPFloatSemantics(VT),
                       APFloat::rmNearestTiesToEven, &ignored);
-      return getConstantFP(V, VT);
+      return getConstantFP(V, DL, VT);
     }
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT: {
@@ -2755,20 +2872,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
         break;
       APInt api(VT.getSizeInBits(), x);
-      return getConstant(api, VT);
+      return getConstant(api, DL, VT);
     }
     case ISD::BITCAST:
       if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
-        return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT);
+        return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
       else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
-        return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+        return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
       else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
-        return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+        return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
       break;
     }
   }
 
-  // Constant fold unary operations with a vector integer operand.
+  // Constant fold unary operations with a vector integer or float operand.
   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
     if (BV->isConstant()) {
       switch (Opcode) {
@@ -2776,18 +2893,55 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
         // FIXME: Entirely reasonable to perform folding of other unary
         // operations here as the need arises.
         break;
+      case ISD::FNEG:
+      case ISD::FABS:
+      case ISD::FCEIL:
+      case ISD::FTRUNC:
+      case ISD::FFLOOR:
+      case ISD::FP_EXTEND:
+      case ISD::FP_TO_SINT:
+      case ISD::FP_TO_UINT:
+      case ISD::TRUNCATE:
       case ISD::UINT_TO_FP:
       case ISD::SINT_TO_FP: {
+        EVT SVT = VT.getScalarType();
+        EVT InVT = BV->getValueType(0);
+        EVT InSVT = InVT.getScalarType();
+
+        // Find legal integer scalar type for constant promotion and
+        // ensure that its scalar size is at least as large as source.
+        EVT LegalSVT = SVT;
+        if (SVT.isInteger()) {
+          LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT);
+          if (LegalSVT.bitsLT(SVT)) break;
+        }
+
+        // Let the above scalar folding handle the folding of each element.
         SmallVector<SDValue, 8> Ops;
         for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
           SDValue OpN = BV->getOperand(i);
-          // Let the above scalar folding handle the conversion of each
-          // element.
-          OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(),
-                        OpN);
+          EVT OpVT = OpN.getValueType();
+
+          // Build vector (integer) scalar operands may need implicit
+          // truncation - do this before constant folding.
+          if (OpVT.isInteger() && OpVT.bitsGT(InSVT))
+            OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN);
+
+          OpN = getNode(Opcode, DL, SVT, OpN);
+
+          // Legalize the (integer) scalar constant if necessary.
+          if (LegalSVT != SVT)
+            OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN);
+
+          if (OpN.getOpcode() != ISD::UNDEF &&
+              OpN.getOpcode() != ISD::Constant &&
+              OpN.getOpcode() != ISD::ConstantFP)
+            break;
           Ops.push_back(OpN);
         }
-        return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        if (Ops.size() == VT.getVectorNumElements())
+          return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        break;
       }
       }
     }
@@ -2825,7 +2979,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
     else if (OpOpcode == ISD::UNDEF)
       // sext(undef) = 0, because the top bits will all be the same.
-      return getConstant(0, VT);
+      return getConstant(0, DL, VT);
     break;
   case ISD::ZERO_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2842,7 +2996,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
                      Operand.getNode()->getOperand(0));
     else if (OpOpcode == ISD::UNDEF)
       // zext(undef) = 0, because the top bits will be zero.
-      return getConstant(0, VT);
+      return getConstant(0, DL, VT);
     break;
   case ISD::ANY_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2941,7 +3095,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     SDValue Ops[1] = { Operand };
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
@@ -2956,7 +3110,54 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
+                                        const APInt &C2) {
+  switch (Opcode) {
+  case ISD::ADD:  return std::make_pair(C1 + C2, true);
+  case ISD::SUB:  return std::make_pair(C1 - C2, true);
+  case ISD::MUL:  return std::make_pair(C1 * C2, true);
+  case ISD::AND:  return std::make_pair(C1 & C2, true);
+  case ISD::OR:   return std::make_pair(C1 | C2, true);
+  case ISD::XOR:  return std::make_pair(C1 ^ C2, true);
+  case ISD::SHL:  return std::make_pair(C1 << C2, true);
+  case ISD::SRL:  return std::make_pair(C1.lshr(C2), true);
+  case ISD::SRA:  return std::make_pair(C1.ashr(C2), true);
+  case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
+  case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+  case ISD::UDIV:
+    if (!C2.getBoolValue())
+      break;
+    return std::make_pair(C1.udiv(C2), true);
+  case ISD::UREM:
+    if (!C2.getBoolValue())
+      break;
+    return std::make_pair(C1.urem(C2), true);
+  case ISD::SDIV:
+    if (!C2.getBoolValue())
+      break;
+    return std::make_pair(C1.sdiv(C2), true);
+  case ISD::SREM:
+    if (!C2.getBoolValue())
+      break;
+    return std::make_pair(C1.srem(C2), true);
+  }
+  return std::make_pair(APInt(1, 0), false);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
+                                             const ConstantSDNode *Cst1,
+                                             const ConstantSDNode *Cst2) {
+  if (Cst1->isOpaque() || Cst2->isOpaque())
+    return SDValue();
+
+  std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
+                                            Cst2->getAPIntValue());
+  if (!Folded.second)
+    return SDValue();
+  return getConstant(Folded.first, DL, VT);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
                                              SDNode *Cst1, SDNode *Cst2) {
   // If the opcode is a target-specific ISD node, there's nothing we can
   // do here and the operand rules may not line up with the below, so
@@ -2964,116 +3165,59 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
   if (Opcode >= ISD::BUILTIN_OP_END)
     return SDValue();
 
-  SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
-  SmallVector<SDValue, 4> Outputs;
-  EVT SVT = VT.getScalarType();
+  // Handle the case of two scalars.
+  if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
+    if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
+      if (SDValue Folded =
+          FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) {
+        if (!VT.isVector())
+          return Folded;
+        SmallVector<SDValue, 4> Outputs;
+        // We may have a vector type but a scalar result. Create a splat.
+        Outputs.resize(VT.getVectorNumElements(), Outputs.back());
+        // Build a big vector out of the scalar elements we generated.
+        return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
+      } else {
+        return SDValue();
+      }
+    }
+  }
 
-  ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
-  ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
-  if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque()))
+  // For vectors extract each constant element into Inputs so we can constant
+  // fold them individually.
+  BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+  BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+  if (!BV1 || !BV2)
     return SDValue();
 
-  if (Scalar1 && Scalar2)
-    // Scalar instruction.
-    Inputs.push_back(std::make_pair(Scalar1, Scalar2));
-  else {
-    // For vectors extract each constant element into Inputs so we can constant
-    // fold them individually.
-    BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
-    BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
-    if (!BV1 || !BV2)
-      return SDValue();
-
-    assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
-
-    for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
-      ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
-      ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
-      if (!V1 || !V2) // Not a constant, bail.
-        return SDValue();
+  assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
 
-      if (V1->isOpaque() || V2->isOpaque())
-        return SDValue();
-
-      // Avoid BUILD_VECTOR nodes that perform implicit truncation.
-      // FIXME: This is valid and could be handled by truncating the APInts.
-      if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
-        return SDValue();
+  EVT SVT = VT.getScalarType();
+  SmallVector<SDValue, 4> Outputs;
+  for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+    ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+    ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+    if (!V1 || !V2) // Not a constant, bail.
+      return SDValue();
 
-      Inputs.push_back(std::make_pair(V1, V2));
-    }
-  }
+    if (V1->isOpaque() || V2->isOpaque())
+      return SDValue();
 
-  // We have a number of constant values, constant fold them element by element.
-  for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
-    const APInt &C1 = Inputs[I].first->getAPIntValue();
-    const APInt &C2 = Inputs[I].second->getAPIntValue();
+    // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+    // FIXME: This is valid and could be handled by truncating the APInts.
+    if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+      return SDValue();
 
-    switch (Opcode) {
-    case ISD::ADD:
-      Outputs.push_back(getConstant(C1 + C2, SVT));
-      break;
-    case ISD::SUB:
-      Outputs.push_back(getConstant(C1 - C2, SVT));
-      break;
-    case ISD::MUL:
-      Outputs.push_back(getConstant(C1 * C2, SVT));
-      break;
-    case ISD::UDIV:
-      if (!C2.getBoolValue())
-        return SDValue();
-      Outputs.push_back(getConstant(C1.udiv(C2), SVT));
-      break;
-    case ISD::UREM:
-      if (!C2.getBoolValue())
-        return SDValue();
-      Outputs.push_back(getConstant(C1.urem(C2), SVT));
-      break;
-    case ISD::SDIV:
-      if (!C2.getBoolValue())
-        return SDValue();
-      Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
-      break;
-    case ISD::SREM:
-      if (!C2.getBoolValue())
-        return SDValue();
-      Outputs.push_back(getConstant(C1.srem(C2), SVT));
-      break;
-    case ISD::AND:
-      Outputs.push_back(getConstant(C1 & C2, SVT));
-      break;
-    case ISD::OR:
-      Outputs.push_back(getConstant(C1 | C2, SVT));
-      break;
-    case ISD::XOR:
-      Outputs.push_back(getConstant(C1 ^ C2, SVT));
-      break;
-    case ISD::SHL:
-      Outputs.push_back(getConstant(C1 << C2, SVT));
-      break;
-    case ISD::SRL:
-      Outputs.push_back(getConstant(C1.lshr(C2), SVT));
-      break;
-    case ISD::SRA:
-      Outputs.push_back(getConstant(C1.ashr(C2), SVT));
-      break;
-    case ISD::ROTL:
-      Outputs.push_back(getConstant(C1.rotl(C2), SVT));
-      break;
-    case ISD::ROTR:
-      Outputs.push_back(getConstant(C1.rotr(C2), SVT));
-      break;
-    default:
+    // Fold one vector element.
+    std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(),
+                                              V2->getAPIntValue());
+    if (!Folded.second)
       return SDValue();
-    }
+    Outputs.push_back(getConstant(Folded.first, DL, SVT));
   }
 
-  assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() &&
-                                  "Expected a scalar or vector!"));
-
-  // Handle the scalar case first.
-  if (!VT.isVector())
-    return Outputs.back();
+  assert(VT.getVectorNumElements() == Outputs.size() &&
+         "Vector size mismatch!");
 
   // We may have a vector type but a scalar result. Create a splat.
   Outputs.resize(VT.getVectorNumElements(), Outputs.back());
@@ -3109,6 +3253,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
                                     N1.getNode()->op_end());
       Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+
+      // BUILD_VECTOR requires all inputs to be of the same type, find the
+      // maximum type and extend them all.
+      EVT SVT = VT.getScalarType();
+      for (SDValue Op : Elts)
+        SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+      if (SVT.bitsGT(VT.getScalarType()))
+        for (SDValue &Op : Elts)
+          Op = TLI->isZExtFree(Op.getValueType(), SVT)
+             ? getZExtOrTrunc(Op, DL, SVT)
+             : getSExtOrTrunc(Op, DL, SVT);
+
       return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
     }
     break;
@@ -3273,12 +3429,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     assert(EVT.bitsLE(VT) && "Not extending!");
     if (EVT == VT) return N1;  // Not actually extending
 
+    auto SignExtendInReg = [&](APInt Val) {
+      unsigned FromBits = EVT.getScalarType().getSizeInBits();
+      Val <<= Val.getBitWidth() - FromBits;
+      Val = Val.ashr(Val.getBitWidth() - FromBits);
+      return getConstant(Val, DL, VT.getScalarType());
+    };
+
     if (N1C) {
       APInt Val = N1C->getAPIntValue();
-      unsigned FromBits = EVT.getScalarType().getSizeInBits();
-      Val <<= Val.getBitWidth()-FromBits;
-      Val = Val.ashr(Val.getBitWidth()-FromBits);
-      return getConstant(Val, VT);
+      return SignExtendInReg(Val);
+    }
+    if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
+      SmallVector<SDValue, 8> Ops;
+      for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+        SDValue Op = N1.getOperand(i);
+        if (Op.getValueType() != VT.getScalarType()) break;
+        if (Op.getOpcode() == ISD::UNDEF) {
+          Ops.push_back(Op);
+          continue;
+        }
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) {
+          APInt Val = C->getAPIntValue();
+          Ops.push_back(SignExtendInReg(Val));
+          continue;
+        }
+        break;
+      }
+      if (Ops.size() == VT.getVectorNumElements())
+        return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
     }
     break;
   }
@@ -3287,6 +3466,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     if (N1.getOpcode() == ISD::UNDEF)
       return getUNDEF(VT);
 
+    // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
+    if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+      return getUNDEF(VT);
+
     // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
     // expanding copies of large vectors from registers.
     if (N2C &&
@@ -3296,7 +3479,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
         N1.getOperand(0).getValueType().getVectorNumElements();
       return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
                      N1.getOperand(N2C->getZExtValue() / Factor),
-                     getConstant(N2C->getZExtValue() % Factor,
+                     getConstant(N2C->getZExtValue() % Factor, DL,
                                  N2.getValueType()));
     }
 
@@ -3353,7 +3536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       unsigned ElementSize = VT.getSizeInBits();
       unsigned Shift = ElementSize * N2C->getZExtValue();
       APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
-      return getConstant(ShiftedVal.trunc(ElementSize), VT);
+      return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
     }
     break;
   case ISD::EXTRACT_SUBVECTOR: {
@@ -3384,7 +3567,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
 
   // Perform trivial constant folding.
   if (SDValue SV =
-          FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()))
+          FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
     return SV;
 
   // Canonicalize constant to RHS if commutative.
@@ -3409,35 +3592,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       case ISD::FADD:
         s = V1.add(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || s != APFloat::opInvalidOp)
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         break;
       case ISD::FSUB:
         s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || s!=APFloat::opInvalidOp)
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         break;
       case ISD::FMUL:
         s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || s!=APFloat::opInvalidOp)
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         break;
       case ISD::FDIV:
         s = V1.divide(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
                                  s!=APFloat::opDivByZero)) {
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         }
         break;
       case ISD::FREM :
         s = V1.mod(V2, APFloat::rmNearestTiesToEven);
         if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
                                  s!=APFloat::opDivByZero)) {
-          return getConstantFP(V1, VT);
+          return getConstantFP(V1, DL, VT);
         }
         break;
       case ISD::FCOPYSIGN:
         V1.copySign(V2);
-        return getConstantFP(V1, VT);
+        return getConstantFP(V1, DL, VT);
       default: break;
       }
     }
@@ -3449,7 +3632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       // FIXME need to be more flexible about rounding mode.
       (void)V.convert(EVTToAPFloatSemantics(VT),
                       APFloat::rmNearestTiesToEven, &ignored);
-      return getConstantFP(V, VT);
+      return getConstantFP(V, DL, VT);
     }
   }
 
@@ -3474,7 +3657,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       case ISD::SRL:
       case ISD::SHL:
         if (!VT.isVector())
-          return getConstant(0, VT);    // fold op(undef, arg2) -> 0
+          return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0
         // For vectors, we can't easily build an all zero vector, just return
         // the LHS.
         return N2;
@@ -3489,7 +3672,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       if (N1.getOpcode() == ISD::UNDEF)
         // Handle undef ^ undef -> 0 special case. This is a common
         // idiom (misuse).
-        return getConstant(0, VT);
+        return getConstant(0, DL, VT);
       // fallthrough
     case ISD::ADD:
     case ISD::ADDC:
@@ -3513,13 +3696,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     case ISD::SRL:
     case ISD::SHL:
       if (!VT.isVector())
-        return getConstant(0, VT);  // fold op(arg1, undef) -> 0
+        return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0
       // For vectors, we can't easily build an all zero vector, just return
       // the LHS.
       return N1;
     case ISD::OR:
       if (!VT.isVector())
-        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
       // For vectors, we can't easily build an all one vector, just return
       // the LHS.
       return N1;
@@ -3539,14 +3722,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     if (BinOpHasFlags)
       AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
 
     CSEMap.InsertNode(N, IP);
   } else {
-
     N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
   }
 
@@ -3569,8 +3751,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
       const APFloat &V3 = N3CFP->getValueAPF();
       APFloat::opStatus s =
         V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
-      if (s != APFloat::opInvalidOp)
-        return getConstantFP(V1, VT);
+      if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
+        return getConstantFP(V1, DL, VT);
     }
     break;
   }
@@ -3643,7 +3825,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
@@ -3705,16 +3887,32 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
     assert(C->getAPIntValue().getBitWidth() == 8);
     APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
     if (VT.isInteger())
-      return DAG.getConstant(Val, VT);
-    return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
+      return DAG.getConstant(Val, dl, VT);
+    return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
+                             VT);
   }
 
-  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+  assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
+  EVT IntVT = VT.getScalarType();
+  if (!IntVT.isInteger())
+    IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
   if (NumBits > 8) {
     // Use a multiplication with 0x010101... to extend the input to the
     // required length.
     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
-    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+    Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
+                        DAG.getConstant(Magic, dl, IntVT));
+  }
+
+  if (VT != Value.getValueType() && !VT.isInteger())
+    Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value);
+  if (VT != Value.getValueType()) {
+    assert(VT.getVectorElementType() == Value.getValueType() &&
+           "value type should be one vector element here");
+    SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value);
+    Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps);
   }
 
   return Value;
@@ -3728,15 +3926,16 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
   // Handle vector with all elements zero.
   if (Str.empty()) {
     if (VT.isInteger())
-      return DAG.getConstant(0, VT);
+      return DAG.getConstant(0, dl, VT);
     else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
-      return DAG.getConstantFP(0.0, VT);
+      return DAG.getConstantFP(0.0, dl, VT);
     else if (VT.isVector()) {
       unsigned NumElts = VT.getVectorNumElements();
       MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
       return DAG.getNode(ISD::BITCAST, dl, VT,
-                         DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
-                                                             EltVT, NumElts)));
+                         DAG.getConstant(0, dl,
+                                         EVT::getVectorVT(*DAG.getContext(),
+                                                          EltVT, NumElts)));
     } else
       llvm_unreachable("Expected type!");
   }
@@ -3759,7 +3958,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
   // of a load, then it is cost effective to turn the load into the immediate.
   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
   if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
-    return DAG.getConstant(Val, VT);
+    return DAG.getConstant(Val, dl, VT);
   return SDValue(nullptr, 0);
 }
 
@@ -3769,7 +3968,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl,
                                       SelectionDAG &DAG) {
   EVT VT = Base.getValueType();
   return DAG.getNode(ISD::ADD, dl,
-                     VT, Base, DAG.getConstant(Offset, VT));
+                     VT, Base, DAG.getConstant(Offset, dl, VT));
 }
 
 /// isMemSrcFromString - Returns true if memcpy source is a string constant.
@@ -3918,9 +4117,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize =
-    MF.getFunction()->getAttributes().
-      hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4033,8 +4230,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4128,8 +4324,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4198,7 +4393,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
 SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol, bool AlwaysInline,
-                                MachinePointerInfo DstPtrInfo,
+                                bool isTailCall, MachinePointerInfo DstPtrInfo,
                                 MachinePointerInfo SrcPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
@@ -4219,11 +4414,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
 
   // Then check to see if we should lower the memcpy with target-specific
   // code. If the target chooses to do this, this is the next best.
-  SDValue Result =
-      TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
-                                   isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemcpy(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+        DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
 
   // If we really need inline code and the target declined to provide it,
   // use a (potentially long) sequence of loads and stores.
@@ -4254,15 +4451,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
-                                 unsigned Align, bool isVol,
+                                 unsigned Align, bool isVol, bool isTailCall,
                                  MachinePointerInfo DstPtrInfo,
                                  MachinePointerInfo SrcPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
@@ -4285,10 +4483,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
 
   // Then check to see if we should lower the memmove with target-specific
   // code. If the target chooses to do this, this is the next best.
-  SDValue Result = TSI->EmitTargetCodeForMemmove(
-      *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemmove(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
 
   // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
   // not be safe.  See memcpy above for more details.
@@ -4307,15 +4507,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
-                                unsigned Align, bool isVol,
+                                unsigned Align, bool isVol, bool isTailCall,
                                 MachinePointerInfo DstPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
@@ -4337,10 +4538,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
 
   // Then check to see if we should lower the memset with target-specific
   // code. If the target chooses to do this, this is the next best.
-  SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src,
-                                                Size, Align, isVol, DstPtrInfo);
-  if (Result.getNode())
-    return Result;
+  if (TSI) {
+    SDValue Result = TSI->EmitTargetCodeForMemset(
+        *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
 
   // Emit a library call.
   Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
@@ -4362,7 +4565,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
@@ -4379,7 +4583,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
   AddNodeIDNode(ID, Opcode, VTList, Ops);
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void* IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4584,7 +4788,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
     AddNodeIDNode(ID, Opcode, VTList, Ops);
     ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
       cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
       return SDValue(E, 0);
     }
@@ -4685,10 +4889,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
     assert(VT.isInteger() == MemVT.isInteger() &&
            "Cannot convert from FP to Int or Int -> FP!");
     assert(VT.isVector() == MemVT.isVector() &&
-           "Cannot use trunc store to convert to or from a vector!");
+           "Cannot use an ext load to convert to or from a vector!");
     assert((!VT.isVector() ||
             VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
-           "Cannot use trunc store to change the number of vector elements!");
+           "Cannot use an ext load to change the number of vector elements!");
   }
 
   bool Indexed = AM != ISD::UNINDEXED;
@@ -4706,7 +4910,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                                      MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<LoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4814,7 +5018,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
                                      MMO->isNonTemporal(), MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4883,7 +5087,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
                                      MMO->isNonTemporal(), MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4909,7 +5113,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
   ID.AddInteger(ST->getRawSubclassData());
   ID.AddInteger(ST->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
     return SDValue(E, 0);
 
   SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
@@ -4938,7 +5142,7 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
                                      MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4965,7 +5169,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
                                      MMO->isNonTemporal(), MMO->isInvariant()));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
     cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
@@ -4977,11 +5181,60 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
   return SDValue(N, 0);
 }
 
+SDValue
+SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
+                              ArrayRef<SDValue> Ops,
+                              MachineMemOperand *MMO) {
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
+                                     MMO->isVolatile(),
+                                     MMO->isNonTemporal(),
+                                     MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+  void *IP = nullptr;
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+    cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  MaskedGatherSDNode *N = 
+    new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
+                                           Ops, VTs, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl,
+                                       ArrayRef<SDValue> Ops,
+                                       MachineMemOperand *MMO) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal(),
+                                     MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+  void *IP = nullptr;
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+    cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N =
+    new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(),
+                                            Ops, VTs, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
 SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
                                SDValue Chain, SDValue Ptr,
                                SDValue SV,
                                unsigned Align) {
-  SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+  SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
   return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
 }
 
@@ -5041,7 +5294,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
 
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
@@ -5096,7 +5349,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
       return SDValue(E, 0);
 
     if (NumOps == 1) {
@@ -5340,17 +5593,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
   assert(N->getNumOperands() == NumOps &&
          "Update with wrong number of operands");
 
-  // Check to see if there is no change.
-  bool AnyChange = false;
-  for (unsigned i = 0; i != NumOps; ++i) {
-    if (Ops[i] != N->getOperand(i)) {
-      AnyChange = true;
-      break;
-    }
-  }
-
-  // No operands changed, just return the input node.
-  if (!AnyChange) return N;
+  // If no operands changed just return the input node.
+  if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+    return N;
 
   // See if the modified node already exists.
   void *InsertPos = nullptr;
@@ -5498,8 +5743,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 /// For IROrder, we keep the smaller of the two
 SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
   DebugLoc NLoc = N->getDebugLoc();
-  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) &&
-    (OLoc.getDebugLoc() != NLoc)) {
+  if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
     N->setDebugLoc(DebugLoc());
   }
   unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
@@ -5531,7 +5775,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
   if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops);
-    if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP))
       return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));
   }
 
@@ -5737,7 +5981,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, ~Opcode, VTs, OpsArray);
     IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) {
       return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));
     }
   }
@@ -5769,7 +6013,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
 SDValue
 SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,
                                      SDValue Operand) {
-  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
   SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
                                   VT, Operand, SRIdxVal);
   return SDValue(Subreg, 0);
@@ -5780,7 +6024,7 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,
 SDValue
 SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
                                     SDValue Operand, SDValue Subreg) {
-  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
   SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
                                   VT, Operand, Subreg, SRIdxVal);
   return SDValue(Result, 0);
@@ -5797,7 +6041,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
     if (isBinOpWithFlags(Opcode))
       AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
     void *IP = nullptr;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP))
       return E;
   }
   return nullptr;
@@ -5809,21 +6053,28 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
 SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
                                       unsigned R, bool IsIndirect, uint64_t Off,
                                       DebugLoc DL, unsigned O) {
-  return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  return new (DbgInfo->getAlloc())
+      SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
 }
 
 /// Constant
 SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
                                               const Value *C, uint64_t Off,
                                               DebugLoc DL, unsigned O) {
-  return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O);
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
 }
 
 /// FrameIndex
 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
                                                 unsigned FI, uint64_t Off,
                                                 DebugLoc DL, unsigned O) {
-  return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O);
+  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
+  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
 }
 
 namespace {
@@ -6489,7 +6740,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
         Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                               OperandEltVT,
                               Operand,
-                              getConstant(i, TLI->getVectorIdxTy()));
+                              getConstant(i, dl, TLI->getVectorIdxTy()));
       } else {
         // A scalar operand; just use it as is.
         Operands[j] = Operand;
@@ -6595,8 +6846,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
     unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
     APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
-    llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
-                           TLI->getDataLayout());
+    llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
+                           *TLI->getDataLayout());
     unsigned AlignBits = KnownZero.countTrailingOnes();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
     if (Align)
@@ -6652,9 +6903,10 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
          "More vector elements requested than available!");
   SDValue Lo, Hi;
   Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
-               getConstant(0, TLI->getVectorIdxTy()));
+               getConstant(0, DL, TLI->getVectorIdxTy()));
   Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
-               getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy()));
+               getConstant(LoVT.getVectorNumElements(), DL,
+                           TLI->getVectorIdxTy()));
   return std::make_pair(Lo, Hi);
 }
 
@@ -6670,7 +6922,7 @@ void SelectionDAG::ExtractVectorElements(SDValue Op,
   SDLoc SL(Op);
   for (unsigned i = Start, e = Start + Count; i != e; ++i) {
     Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
-                           Op, getConstant(i, IdxTy)));
+                           Op, getConstant(i, SL, IdxTy)));
   }
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fe9e442..85303d2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -57,7 +58,6 @@
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -161,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
         EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
         Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
-                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
+                         DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
                                          TLI.getPointerTy()));
         Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
         Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
@@ -208,7 +208,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
     // FP_ROUND's are always exact here.
     if (ValueVT.bitsLT(Val.getValueType()))
       return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
-                         DAG.getTargetConstant(1, TLI.getPointerTy()));
+                         DAG.getTargetConstant(1, DL, TLI.getPointerTy()));
 
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
@@ -301,7 +301,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
       assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
              "Cannot narrow, it would be a lossy transformation");
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
-                         DAG.getConstant(0, TLI.getVectorIdxTy()));
+                         DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
     }
 
     // Vector/Vector bitcast.
@@ -425,7 +425,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
     unsigned RoundBits = RoundParts * PartBits;
     unsigned OddParts = NumParts - RoundParts;
     SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
-                                 DAG.getIntPtrConstant(RoundBits));
+                                 DAG.getIntPtrConstant(RoundBits, DL));
     getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
 
     if (TLI.isBigEndian())
@@ -452,9 +452,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
       SDValue &Part1 = Parts[i+StepSize/2];
 
       Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
-                          ThisVT, Part0, DAG.getIntPtrConstant(1));
+                          ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
       Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
-                          ThisVT, Part0, DAG.getIntPtrConstant(0));
+                          ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
 
       if (ThisBits == PartBits && ThisVT != PartVT) {
         Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
@@ -493,7 +493,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
       SmallVector<SDValue, 16> Ops;
       for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                                  ElementVT, Val, DAG.getConstant(i,
+                                  ElementVT, Val, DAG.getConstant(i, DL,
                                                   TLI.getVectorIdxTy())));
 
       for (unsigned i = ValueVT.getVectorNumElements(),
@@ -520,7 +520,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
       assert(ValueVT.getVectorNumElements() == 1 &&
              "Only trivial vector-to-scalar conversions should get here!");
       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                        PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy()));
+                        PartVT, Val,
+                        DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
 
       bool Smaller = ValueVT.bitsLE(PartVT);
       Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
@@ -550,12 +551,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
     if (IntermediateVT.isVector())
       Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
                            IntermediateVT, Val,
-                   DAG.getConstant(i * (NumElements / NumIntermediates),
+                   DAG.getConstant(i * (NumElements / NumIntermediates), DL,
                                    TLI.getVectorIdxTy()));
     else
       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                            IntermediateVT, Val,
-                           DAG.getConstant(i, TLI.getVectorIdxTy()));
+                           DAG.getConstant(i, DL, TLI.getVectorIdxTy()));
   }
 
   // Split the intermediate operands into legal parts.
@@ -576,93 +577,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
   }
 }
 
-namespace {
-  /// RegsForValue - This struct represents the registers (physical or virtual)
-  /// that a particular set of values is assigned, and the type information
-  /// about the value. The most common situation is to represent one value at a
-  /// time, but struct or array values are handled element-wise as multiple
-  /// values.  The splitting of aggregates is performed recursively, so that we
-  /// never have aggregate-typed registers. The values at this point do not
-  /// necessarily have legal types, so each value may require one or more
-  /// registers of some legal type.
-  ///
-  struct RegsForValue {
-    /// ValueVTs - The value types of the values, which may not be legal, and
-    /// may need be promoted or synthesized from one or more registers.
-    ///
-    SmallVector<EVT, 4> ValueVTs;
+RegsForValue::RegsForValue() {}
 
-    /// RegVTs - The value types of the registers. This is the same size as
-    /// ValueVTs and it records, for each value, what the type of the assigned
-    /// register or registers are. (Individual values are never synthesized
-    /// from more than one type of register.)
-    ///
-    /// With virtual registers, the contents of RegVTs is redundant with TLI's
-    /// getRegisterType member function, however when with physical registers
-    /// it is necessary to have a separate record of the types.
-    ///
-    SmallVector<MVT, 4> RegVTs;
-
-    /// Regs - This list holds the registers assigned to the values.
-    /// Each legal or promoted value requires one register, and each
-    /// expanded value requires multiple registers.
-    ///
-    SmallVector<unsigned, 4> Regs;
-
-    RegsForValue() {}
-
-    RegsForValue(const SmallVector<unsigned, 4> &regs,
-                 MVT regvt, EVT valuevt)
-      : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
-
-    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
-                 unsigned Reg, Type *Ty) {
-      ComputeValueVTs(tli, Ty, ValueVTs);
-
-      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-        EVT ValueVT = ValueVTs[Value];
-        unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
-        MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
-        for (unsigned i = 0; i != NumRegs; ++i)
-          Regs.push_back(Reg + i);
-        RegVTs.push_back(RegisterVT);
-        Reg += NumRegs;
-      }
-    }
+RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
+                           EVT valuevt)
+    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
 
-    /// append - Add the specified values to this one.
-    void append(const RegsForValue &RHS) {
-      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
-      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
-      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
-    }
+RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+                           unsigned Reg, Type *Ty) {
+  ComputeValueVTs(tli, Ty, ValueVTs);
 
-    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-    /// this value and returns the result as a ValueVTs value.  This uses
-    /// Chain/Flag as the input and updates them for the output Chain/Flag.
-    /// If the Flag pointer is NULL, no flag is used.
-    SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
-                            SDLoc dl,
-                            SDValue &Chain, SDValue *Flag,
-                            const Value *V = nullptr) const;
-
-    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-    /// specified value into the registers specified by this object.  This uses
-    /// Chain/Flag as the input and updates them for the output Chain/Flag.
-    /// If the Flag pointer is NULL, no flag is used.
-    void
-    getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
-                  SDValue *Flag, const Value *V,
-                  ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
-
-    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
-    /// operand list.  This adds the code marker, matching input operand index
-    /// (if applicable), and includes the number of values added into it.
-    void AddInlineAsmOperands(unsigned Kind,
-                              bool HasMatching, unsigned MatchingIdx,
-                              SelectionDAG &DAG,
-                              std::vector<SDValue> &Ops) const;
-  };
+  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+    MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+    for (unsigned i = 0; i != NumRegs; ++i)
+      Regs.push_back(Reg + i);
+    RegVTs.push_back(RegisterVT);
+    Reg += NumRegs;
+  }
 }
 
 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
@@ -721,7 +654,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
         // The current value is a zero.
         // Explicitly express that as it would be easier for
         // optimizations to kick in.
-        Parts[i] = DAG.getConstant(0, RegisterVT);
+        Parts[i] = DAG.getConstant(0, dl, RegisterVT);
         continue;
       }
 
@@ -823,7 +756,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
 /// operand list.  This adds the code marker and includes the number of
 /// values added into it.
 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
-                                        unsigned MatchingIdx,
+                                        unsigned MatchingIdx, SDLoc dl,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -843,7 +776,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
     Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
   }
 
-  SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
   Ops.push_back(Res);
 
   unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
@@ -869,7 +802,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
   AA = &aa;
   GFI = gfi;
   LibInfo = li;
-  DL = DAG.getSubtarget().getDataLayout();
+  DL = DAG.getTarget().getDataLayout();
   Context = DAG.getContext();
   LPadToCallSiteMap.clear();
 }
@@ -997,14 +930,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
     const DbgValueInst *DI = DDI.getDI();
     DebugLoc dl = DDI.getdl();
     unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
-    MDNode *Variable = DI->getVariable();
-    MDNode *Expr = DI->getExpression();
+    DILocalVariable *Variable = DI->getVariable();
+    DIExpression *Expr = DI->getExpression();
+    assert(Variable->isValidLocationForIntrinsic(dl) &&
+           "Expected inlined-at fields to agree");
     uint64_t Offset = DI->getOffset();
     // A dbg.value for an alloca is always indirect.
     bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
     SDDbgValue *SDV;
     if (Val.getNode()) {
-      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect,
+      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
                                     Val)) {
         SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
                               IsIndirect, Offset, dl, DbgSDNodeOrder);
@@ -1016,6 +951,24 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
   }
 }
 
+/// getCopyFromRegs - If there was virtual register allocated for the value V
+/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
+  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+  SDValue Result;
+
+  if (It != FuncInfo.ValueMap.end()) {
+    unsigned InReg = It->second;
+    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
+                     Ty);
+    SDValue Chain = DAG.getEntryNode();
+    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+    resolveDanglingDebugInfo(V, Result);
+  }
+
+  return Result;
+}
+
 /// getValue - Return an SDValue for the given Value.
 SDValue SelectionDAGBuilder::getValue(const Value *V) {
   // If we already have an SDValue for this value, use it. It's important
@@ -1026,15 +979,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
 
   // If there's a virtual register allocated and initialized for this
   // value, use it.
-  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
-  if (It != FuncInfo.ValueMap.end()) {
-    unsigned InReg = It->second;
-    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
-                     V->getType());
-    SDValue Chain = DAG.getEntryNode();
-    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
-    resolveDanglingDebugInfo(V, N);
-    return N;
+  SDValue copyFromReg = getCopyFromRegs(V, V->getType());
+  if (copyFromReg.getNode()) {
+    return copyFromReg;
   }
 
   // Otherwise create a new SDValue and remember it.
@@ -1044,6 +991,12 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
   return Val;
 }
 
+// Return true if SDValue exists for the given Value
+bool SelectionDAGBuilder::findValue(const Value *V) const {
+  return (NodeMap.find(V) != NodeMap.end()) ||
+    (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
+}
+
 /// getNonRegisterValue - Return an SDValue for the given Value, but
 /// don't look in FuncInfo.ValueMap for a virtual register.
 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
@@ -1067,18 +1020,18 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     EVT VT = TLI.getValueType(V->getType(), true);
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return DAG.getConstant(*CI, VT);
+      return DAG.getConstant(*CI, getCurSDLoc(), VT);
 
     if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
       return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
 
     if (isa<ConstantPointerNull>(C)) {
       unsigned AS = V->getType()->getPointerAddressSpace();
-      return DAG.getConstant(0, TLI.getPointerTy(AS));
+      return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS));
     }
 
     if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
-      return DAG.getConstantFP(*CFP, VT);
+      return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
 
     if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
       return DAG.getUNDEF(VT);
@@ -1138,9 +1091,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
         if (isa<UndefValue>(C))
           Constants[i] = DAG.getUNDEF(EltVT);
         else if (EltVT.isFloatingPoint())
-          Constants[i] = DAG.getConstantFP(0, EltVT);
+          Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
         else
-          Constants[i] = DAG.getConstant(0, EltVT);
+          Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
       }
 
       return DAG.getMergeValues(Constants, getCurSDLoc());
@@ -1164,9 +1117,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
 
       SDValue Op;
       if (EltVT.isFloatingPoint())
-        Op = DAG.getConstantFP(0, EltVT);
+        Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
       else
-        Op = DAG.getConstant(0, EltVT);
+        Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
       Ops.assign(NumElements, Op);
     }
 
@@ -1223,7 +1176,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
     for (unsigned i = 0; i != NumValues; ++i) {
       SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
                                 RetPtr.getValueType(), RetPtr,
-                                DAG.getIntPtrConstant(Offsets[i]));
+                                DAG.getIntPtrConstant(Offsets[i],
+                                                      getCurSDLoc()));
       Chains[i] =
         DAG.getStore(Chain, getCurSDLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@@ -1573,19 +1527,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   // Update machine-CFG edges.
   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
 
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = BrMBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
   if (I.isUnconditional()) {
     // Update machine-CFG edges.
     BrMBB->addSuccessor(Succ0MBB);
 
     // If this is not a fall-through branch or optimizations are switched off,
     // emit the branch.
-    if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None)
+    if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
       DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
                               MVT::Other, getControlRoot(),
                               DAG.getBasicBlock(Succ0MBB)));
@@ -1674,7 +1622,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
       Cond = CondLHS;
     else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
              CB.CC == ISD::SETEQ) {
-      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+      SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
       Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
     } else
       Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
@@ -1682,19 +1630,19 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
     assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
 
     const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
-    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
+    const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
 
     SDValue CmpOp = getValue(CB.CmpMHS);
     EVT VT = CmpOp.getValueType();
 
     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
-      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
                           ISD::SETLE);
     } else {
       SDValue SUB = DAG.getNode(ISD::SUB, dl,
-                                VT, CmpOp, DAG.getConstant(Low, VT));
+                                VT, CmpOp, DAG.getConstant(Low, dl, VT));
       Cond = DAG.getSetCC(dl, MVT::i1, SUB,
-                          DAG.getConstant(High-Low, VT), ISD::SETULE);
+                          DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
     }
   }
 
@@ -1705,18 +1653,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
   if (CB.TrueBB != CB.FalseBB)
     addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
 
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = SwitchBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
   // If the lhs block is the next block, invert the condition so that we can
   // fall through to the lhs instead of the rhs block.
-  if (CB.TrueBB == NextBlock) {
+  if (CB.TrueBB == NextBlock(SwitchBB)) {
     std::swap(CB.TrueBB, CB.FalseBB);
-    SDValue True = DAG.getConstant(1, Cond.getValueType());
+    SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
     Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
   }
 
@@ -1752,13 +1693,15 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
                                                JumpTableHeader &JTH,
                                                MachineBasicBlock *SwitchBB) {
+  SDLoc dl = getCurSDLoc();
+
   // Subtract the lowest switch case value from the value being switched on and
   // conditional branch to default mbb if the result is greater than the
   // difference between smallest and largest cases.
   SDValue SwitchOp = getValue(JTH.SValue);
   EVT VT = SwitchOp.getValueType();
-  SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp,
-                            DAG.getConstant(JTH.First, VT));
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
+                            DAG.getConstant(JTH.First, dl, VT));
 
   // The SDNode we just created, which holds the value being switched on minus
   // the smallest case value, needs to be copied to a virtual register so it
@@ -1766,10 +1709,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // This value may be smaller or larger than the target's pointer type, and
   // therefore require extension or truncating.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy());
+  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy());
 
   unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
-  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
                                     JumpTableReg, SwitchOp);
   JT.Reg = JumpTableReg;
 
@@ -1777,24 +1720,18 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // for the switch statement if the value being switched on exceeds the largest
   // case in the switch.
   SDValue CMP =
-      DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
-                                                         Sub.getValueType()),
-                   Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT);
+      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
+                                              Sub.getValueType()),
+                   Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT),
+                   ISD::SETUGT);
 
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = SwitchBB;
-
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
                                MVT::Other, CopyTo, CMP,
                                DAG.getBasicBlock(JT.Default));
 
-  if (JT.MBB != NextBlock)
-    BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond,
+  // Avoid emitting unnecessary branches to the next block.
+  if (JT.MBB != NextBlock(SwitchBB))
+    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
                          DAG.getBasicBlock(JT.MBB));
 
   DAG.setRoot(BrCond);
@@ -1824,6 +1761,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
     TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
 
   SDValue Guard;
+  SDLoc dl = getCurSDLoc();
 
   // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the
   // guard value from the virtual register holding the value. Otherwise, emit a
@@ -1831,34 +1769,34 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   unsigned GuardReg = SPD.getGuardReg();
 
   if (GuardReg && TLI.useLoadStackGuardNode())
-    Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg,
+    Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg,
                                PtrTy);
   else
-    Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+    Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
                         GuardPtr, MachinePointerInfo(IRGuard, 0),
                         true, false, false, Align);
 
-  SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+  SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
                                   StackSlotPtr,
                                   MachinePointerInfo::getFixedStack(FI),
                                   true, false, false, Align);
 
   // Perform the comparison via a subtract/getsetcc.
   EVT VT = Guard.getValueType();
-  SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot);
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
 
   SDValue Cmp =
-      DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
+      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
                                                          Sub.getValueType()),
-                   Sub, DAG.getConstant(0, VT), ISD::SETNE);
+                   Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
 
   // If the sub is not 0, then we know the guard/stackslot do not equal, so
   // branch to failure MBB.
-  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
                                MVT::Other, StackSlot.getOperand(0),
                                Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
   // Otherwise branch to success MBB.
-  SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(),
+  SDValue Br = DAG.getNode(ISD::BR, dl,
                            MVT::Other, BrCond,
                            DAG.getBasicBlock(SPD.getSuccessMBB()));
 
@@ -1886,18 +1824,20 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
 /// suitable for "bit tests"
 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
                                              MachineBasicBlock *SwitchBB) {
+  SDLoc dl = getCurSDLoc();
+
   // Subtract the minimum value
   SDValue SwitchOp = getValue(B.SValue);
   EVT VT = SwitchOp.getValueType();
-  SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp,
-                            DAG.getConstant(B.First, VT));
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
+                            DAG.getConstant(B.First, dl, VT));
 
   // Check range
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue RangeCmp =
-      DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
-                                                         Sub.getValueType()),
-                   Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT);
+      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
+                                              Sub.getValueType()),
+                   Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
 
   // Determine the type of the test operands.
   bool UsePtrType = false;
@@ -1914,32 +1854,25 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
   }
   if (UsePtrType) {
     VT = TLI.getPointerTy();
-    Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT);
+    Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
   }
 
   B.RegVT = VT.getSimpleVT();
   B.Reg = FuncInfo.CreateReg(B.RegVT);
-  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
-                                    B.Reg, Sub);
-
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = SwitchBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
   addSuccessorWithWeight(SwitchBB, B.Default);
   addSuccessorWithWeight(SwitchBB, MBB);
 
-  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
                                 MVT::Other, CopyTo, RangeCmp,
                                 DAG.getBasicBlock(B.Default));
 
-  if (MBB != NextBlock)
-    BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo,
+  // Avoid emitting unnecessary branches to the next block.
+  if (MBB != NextBlock(SwitchBB))
+    BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
                           DAG.getBasicBlock(MBB));
 
   DAG.setRoot(BrRange);
@@ -1952,34 +1885,33 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                                            unsigned Reg,
                                            BitTestCase &B,
                                            MachineBasicBlock *SwitchBB) {
+  SDLoc dl = getCurSDLoc();
   MVT VT = BB.RegVT;
-  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
-                                       Reg, VT);
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
   SDValue Cmp;
-  unsigned PopCount = CountPopulation_64(B.Mask);
+  unsigned PopCount = countPopulation(B.Mask);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (PopCount == 1) {
     // Testing for a single bit; just compare the shift count with what it
     // would need to be to shift a 1 bit in that position.
     Cmp = DAG.getSetCC(
-        getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
-        DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ);
+        dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
+        DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ);
   } else if (PopCount == BB.Range) {
     // There is only one zero bit in the range, test for it directly.
     Cmp = DAG.getSetCC(
-        getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
-        DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE);
+        dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
+        DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE);
   } else {
     // Make desired shift
-    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT,
-                                    DAG.getConstant(1, VT), ShiftOp);
+    SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
+                                    DAG.getConstant(1, dl, VT), ShiftOp);
 
     // Emit bit tests and jumps
-    SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(),
-                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));
-    Cmp = DAG.getSetCC(getCurSDLoc(),
-                       TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp,
-                       DAG.getConstant(0, VT), ISD::SETNE);
+    SDValue AndOp = DAG.getNode(ISD::AND, dl,
+                                VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
+    Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp,
+                       DAG.getConstant(0, dl, VT), ISD::SETNE);
   }
 
   // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
@@ -1987,19 +1919,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
   // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
   addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
 
-  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
                               MVT::Other, getControlRoot(),
                               Cmp, DAG.getBasicBlock(B.TargetBB));
 
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = SwitchBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  if (NextMBB != NextBlock)
-    BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd,
+  // Avoid emitting unnecessary branches to the next block.
+  if (NextMBB != NextBlock(SwitchBB))
+    BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
                         DAG.getBasicBlock(NextMBB));
 
   DAG.setRoot(BrAnd);
@@ -2027,13 +1953,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
     case Intrinsic::experimental_patchpoint_i64:
       visitPatchpoint(&I, LandingPad);
       break;
+    case Intrinsic::experimental_gc_statepoint:
+      LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+      break;
     }
   } else
     LowerCallTo(&I, getValue(Callee), false, LandingPad);
 
   // If the value of the invoke is used outside of its defining block, make it
   // available as a virtual register.
-  CopyToExportRegsIfNeeded(&I);
+  // We already took care of the exported value for the statepoint instruction
+  // during call to the LowerStatepoint.
+  if (!isStatepoint(I)) {
+    CopyToExportRegsIfNeeded(&I);
+  }
 
   // Update successor info
   addSuccessorWithWeight(InvokeMBB, Return);
@@ -2065,622 +1998,86 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
     return;
 
   SmallVector<EVT, 2> ValueVTs;
+  SDLoc dl = getCurSDLoc();
   ComputeValueVTs(TLI, LP.getType(), ValueVTs);
   assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
 
   // Get the two live-in registers as SDValues. The physregs have already been
   // copied into virtual registers.
   SDValue Ops[2];
-  Ops[0] = DAG.getZExtOrTrunc(
-      DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
-                         FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
-      getCurSDLoc(), ValueVTs[0]);
+  if (FuncInfo.ExceptionPointerVirtReg) {
+    Ops[0] = DAG.getZExtOrTrunc(
+        DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                           FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+        dl, ValueVTs[0]);
+  } else {
+    Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy());
+  }
   Ops[1] = DAG.getZExtOrTrunc(
-      DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+      DAG.getCopyFromReg(DAG.getEntryNode(), dl,
                          FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
-      getCurSDLoc(), ValueVTs[1]);
+      dl, ValueVTs[1]);
 
   // Merge into one.
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
                             DAG.getVTList(ValueVTs), Ops);
   setValue(&LP, Res);
 }
 
-/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
-/// small case ranges).
-bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
-                                                 CaseRecVector& WorkList,
-                                                 const Value* SV,
-                                                 MachineBasicBlock *Default,
-                                                 MachineBasicBlock *SwitchBB) {
-  // Size is the number of Cases represented by this range.
-  size_t Size = CR.Range.second - CR.Range.first;
-  if (Size > 3)
-    return false;
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = nullptr;
-  MachineFunction::iterator BBI = CR.CaseBB;
-
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  BranchProbabilityInfo *BPI = FuncInfo.BPI;
-  // If any two of the cases has the same destination, and if one value
-  // is the same as the other, but has one bit unset that the other has set,
-  // use bit manipulation to do two compares at once.  For example:
-  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
-  // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
-  // TODO: Handle cases where CR.CaseBB != SwitchBB.
-  if (Size == 2 && CR.CaseBB == SwitchBB) {
-    Case &Small = *CR.Range.first;
-    Case &Big = *(CR.Range.second-1);
-
-    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
-      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
-      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
-
-      // Check that there is only one bit different.
-      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
-          (SmallValue | BigValue) == BigValue) {
-        // Isolate the common bit.
-        APInt CommonBit = BigValue & ~SmallValue;
-        assert((SmallValue | CommonBit) == BigValue &&
-               CommonBit.countPopulation() == 1 && "Not a common bit?");
-
-        SDValue CondLHS = getValue(SV);
-        EVT VT = CondLHS.getValueType();
-        SDLoc DL = getCurSDLoc();
-
-        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
-                                 DAG.getConstant(CommonBit, VT));
-        SDValue Cond = DAG.getSetCC(DL, MVT::i1,
-                                    Or, DAG.getConstant(BigValue, VT),
-                                    ISD::SETEQ);
-
-        // Update successor info.
-        // Both Small and Big will jump to Small.BB, so we sum up the weights.
-        addSuccessorWithWeight(SwitchBB, Small.BB,
-                               Small.ExtraWeight + Big.ExtraWeight);
-        addSuccessorWithWeight(SwitchBB, Default,
-          // The default destination is the first successor in IR.
-          BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
-
-        // Insert the true branch.
-        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
-                                     getControlRoot(), Cond,
-                                     DAG.getBasicBlock(Small.BB));
-
-        // Insert the false branch.
-        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
-                             DAG.getBasicBlock(Default));
-
-        DAG.setRoot(BrCond);
-        return true;
-      }
-    }
-  }
-
-  // Order cases by weight so the most likely case will be checked first.
-  uint32_t UnhandledWeights = 0;
-  if (BPI) {
-    for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
-      uint32_t IWeight = I->ExtraWeight;
-      UnhandledWeights += IWeight;
-      for (CaseItr J = CR.Range.first; J < I; ++J) {
-        uint32_t JWeight = J->ExtraWeight;
-        if (IWeight > JWeight)
-          std::swap(*I, *J);
-      }
-    }
-  }
-  // Rearrange the case blocks so that the last one falls through if possible.
-  Case &BackCase = *(CR.Range.second-1);
-  if (Size > 1 &&
-      NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
-    // The last case block won't fall through into 'NextBlock' if we emit the
-    // branches in this order.  See if rearranging a case value would help.
-    // We start at the bottom as it's the case with the least weight.
-    for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I)
-      if (I->BB == NextBlock) {
-        std::swap(*I, BackCase);
-        break;
-      }
-  }
-
-  // Create a CaseBlock record representing a conditional branch to
-  // the Case's target mbb if the value being switched on SV is equal
-  // to C.
-  MachineBasicBlock *CurBlock = CR.CaseBB;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-    MachineBasicBlock *FallThrough;
-    if (I != E-1) {
-      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
-      CurMF->insert(BBI, FallThrough);
-
-      // Put SV in a virtual register to make it available from the new blocks.
-      ExportFromCurrentBlock(SV);
-    } else {
-      // If the last case doesn't match, go to the default block.
-      FallThrough = Default;
-    }
-
-    const Value *RHS, *LHS, *MHS;
-    ISD::CondCode CC;
-    if (I->High == I->Low) {
-      // This is just small small case range :) containing exactly 1 case
-      CC = ISD::SETEQ;
-      LHS = SV; RHS = I->High; MHS = nullptr;
-    } else {
-      CC = ISD::SETLE;
-      LHS = I->Low; MHS = SV; RHS = I->High;
-    }
-
-    // The false weight should be sum of all un-handled cases.
-    UnhandledWeights -= I->ExtraWeight;
-    CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
-                 /* me */ CurBlock,
-                 /* trueweight */ I->ExtraWeight,
-                 /* falseweight */ UnhandledWeights);
-
-    // If emitting the first comparison, just call visitSwitchCase to emit the
-    // code into the current block.  Otherwise, push the CaseBlock onto the
-    // vector to be later processed by SDISel, and insert the node's MBB
-    // before the next MBB.
-    if (CurBlock == SwitchBB)
-      visitSwitchCase(CB, SwitchBB);
-    else
-      SwitchCases.push_back(CB);
-
-    CurBlock = FallThrough;
-  }
-
-  return true;
-}
-
-static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
-         TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
-}
-
-static APInt ComputeRange(const APInt &First, const APInt &Last) {
-  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
-  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
-  return (LastExt - FirstExt + 1ULL);
-}
-
-/// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
-                                             CaseRecVector &WorkList,
-                                             const Value *SV,
-                                             MachineBasicBlock *Default,
-                                             MachineBasicBlock *SwitchBB) {
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-
-  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
-
-  APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
-    TSize += I->size();
+unsigned
+SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV,
+                                             MachineBasicBlock *LPadBB) {
+  SDValue Chain = getControlRoot();
+  SDLoc dl = getCurSDLoc();
 
+  // Get the typeid that we will dispatch on later.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
-    return false;
-
-  APInt Range = ComputeRange(First, Last);
-  // The density is TSize / Range. Require at least 40%.
-  // It should not be possible for IntTSize to saturate for sane code, but make
-  // sure we handle Range saturation correctly.
-  uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
-  uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
-  if (IntTSize * 10 < IntRange * 4)
-    return false;
-
-  DEBUG(dbgs() << "Lowering jump table\n"
-               << "First entry: " << First << ". Last entry: " << Last << '\n'
-               << "Range: " << Range << ". Size: " << TSize << ".\n\n");
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
+  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy());
+  unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+  unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV);
+  SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy());
+  Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel);
+
+  // Branch to the main landing pad block.
+  MachineBasicBlock *ClauseMBB = FuncInfo.MBB;
+  ClauseMBB->addSuccessor(LPadBB);
+  DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain,
+                          DAG.getBasicBlock(LPadBB)));
+  return VReg;
+}
+
+void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
+#ifndef NDEBUG
+  for (const CaseCluster &CC : Clusters)
+    assert(CC.Low == CC.High && "Input clusters must be single-case");
+#endif
 
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  // Create a new basic block to hold the code for loading the address
-  // of the jump table, and jumping to it.  Update successor information;
-  // we will either branch to the default case for the switch, or the jump
-  // table.
-  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-  CurMF->insert(BBI, JumpTableBB);
-
-  addSuccessorWithWeight(CR.CaseBB, Default);
-  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
-
-  // Build a vector of destination BBs, corresponding to each target
-  // of the jump table. If the value of the jump table slot corresponds to
-  // a case statement, push the case's BB onto the vector, otherwise, push
-  // the default BB.
-  std::vector<MachineBasicBlock*> DestBBs;
-  APInt TEI = First;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
-    const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
-    const APInt &High = cast<ConstantInt>(I->High)->getValue();
-
-    if (Low.sle(TEI) && TEI.sle(High)) {
-      DestBBs.push_back(I->BB);
-      if (TEI==High)
-        ++I;
+  std::sort(Clusters.begin(), Clusters.end(),
+            [](const CaseCluster &a, const CaseCluster &b) {
+    return a.Low->getValue().slt(b.Low->getValue());
+  });
+
+  // Merge adjacent clusters with the same destination.
+  const unsigned N = Clusters.size();
+  unsigned DstIndex = 0;
+  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
+    CaseCluster &CC = Clusters[SrcIndex];
+    const ConstantInt *CaseVal = CC.Low;
+    MachineBasicBlock *Succ = CC.MBB;
+
+    if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
+        (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
+      // If this case has the same successor and is a neighbour, merge it into
+      // the previous cluster.
+      Clusters[DstIndex - 1].High = CaseVal;
+      Clusters[DstIndex - 1].Weight += CC.Weight;
+      assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!");
     } else {
-      DestBBs.push_back(Default);
+      std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
+                   sizeof(Clusters[SrcIndex]));
     }
   }
-
-  // Calculate weight for each unique destination in CR.
-  DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
-  if (FuncInfo.BPI)
-    for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
-          DestWeights.find(I->BB);
-      if (Itr != DestWeights.end())
-        Itr->second += I->ExtraWeight;
-      else
-        DestWeights[I->BB] = I->ExtraWeight;
-    }
-
-  // Update successor info. Add one edge to each unique successor.
-  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
-  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
-         E = DestBBs.end(); I != E; ++I) {
-    if (!SuccsHandled[(*I)->getNumber()]) {
-      SuccsHandled[(*I)->getNumber()] = true;
-      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
-          DestWeights.find(*I);
-      addSuccessorWithWeight(JumpTableBB, *I,
-                             Itr != DestWeights.end() ? Itr->second : 0);
-    }
-  }
-
-  // Create a jump table index for this jump table.
-  unsigned JTEncoding = TLI.getJumpTableEncoding();
-  unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
-                       ->createJumpTableIndex(DestBBs);
-
-  // Set the jump table information so that we can codegen it as a second
-  // MachineBasicBlock
-  JumpTable JT(-1U, JTI, JumpTableBB, Default);
-  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
-  if (CR.CaseBB == SwitchBB)
-    visitJumpTableHeader(JT, JTH, SwitchBB);
-
-  JTCases.push_back(JumpTableBlock(JTH, JT));
-  return true;
-}
-
-/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
-/// 2 subtrees.
-bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
-                                                  CaseRecVector& WorkList,
-                                                  const Value* SV,
-                                                  MachineBasicBlock* SwitchBB) {
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
-
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  // Size is the number of Cases represented by this range.
-  unsigned Size = CR.Range.second - CR.Range.first;
-
-  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
-  double FMetric = 0;
-  CaseItr Pivot = CR.Range.first + Size/2;
-
-  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
-  // (heuristically) allow us to emit JumpTable's later.
-  APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second;
-       I!=E; ++I)
-    TSize += I->size();
-
-  APInt LSize = FrontCase.size();
-  APInt RSize = TSize-LSize;
-  DEBUG(dbgs() << "Selecting best pivot: \n"
-               << "First: " << First << ", Last: " << Last <<'\n'
-               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
-  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
-       J!=E; ++I, ++J) {
-    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
-    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
-    APInt Range = ComputeRange(LEnd, RBegin);
-    assert((Range - 2ULL).isNonNegative() &&
-           "Invalid case distance");
-    // Use volatile double here to avoid excess precision issues on some hosts,
-    // e.g. that use 80-bit X87 registers.
-    volatile double LDensity =
-       (double)LSize.roundToDouble() /
-                           (LEnd - First + 1ULL).roundToDouble();
-    volatile double RDensity =
-      (double)RSize.roundToDouble() /
-                           (Last - RBegin + 1ULL).roundToDouble();
-    volatile double Metric = Range.logBase2()*(LDensity+RDensity);
-    // Should always split in some non-trivial place
-    DEBUG(dbgs() <<"=>Step\n"
-                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
-                 << "LDensity: " << LDensity
-                 << ", RDensity: " << RDensity << '\n'
-                 << "Metric: " << Metric << '\n');
-    if (FMetric < Metric) {
-      Pivot = J;
-      FMetric = Metric;
-      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
-    }
-
-    LSize += J->size();
-    RSize -= J->size();
-  }
-
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (areJTsAllowed(TLI)) {
-    // If our case is dense we *really* should handle it earlier!
-    assert((FMetric > 0) && "Should handle dense range earlier!");
-  } else {
-    Pivot = CR.Range.first + Size/2;
-  }
-
-  CaseRange LHSR(CR.Range.first, Pivot);
-  CaseRange RHSR(Pivot, CR.Range.second);
-  const Constant *C = Pivot->Low;
-  MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr;
-
-  // We know that we branch to the LHS if the Value being switched on is
-  // less than the Pivot value, C.  We use this to optimize our binary
-  // tree a bit, by recognizing that if SV is greater than or equal to the
-  // LHS's Case Value, and that Case Value is exactly one less than the
-  // Pivot's Value, then we can branch directly to the LHS's Target,
-  // rather than creating a leaf node for it.
-  if ((LHSR.second - LHSR.first) == 1 &&
-      LHSR.first->High == CR.GE &&
-      cast<ConstantInt>(C)->getValue() ==
-      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
-    TrueBB = LHSR.first->BB;
-  } else {
-    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, TrueBB);
-    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  // Similar to the optimization above, if the Value being switched on is
-  // known to be less than the Constant CR.LT, and the current Case Value
-  // is CR.LT - 1, then we can branch directly to the target block for
-  // the current Case Value, rather than emitting a RHS leaf node for it.
-  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
-      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
-      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
-    FalseBB = RHSR.first->BB;
-  } else {
-    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, FalseBB);
-    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  // Create a CaseBlock record representing a conditional branch to
-  // the LHS node if the value being switched on SV is less than C.
-  // Otherwise, branch to LHS.
-  CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB);
-
-  if (CR.CaseBB == SwitchBB)
-    visitSwitchCase(CB, SwitchBB);
-  else
-    SwitchCases.push_back(CB);
-
-  return true;
-}
-
-/// handleBitTestsSwitchCase - if current case range has few destination and
-/// range span less, than machine word bitwidth, encode case range into series
-/// of masks and emit bit tests with these masks.
-bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
-                                                   CaseRecVector& WorkList,
-                                                   const Value* SV,
-                                                   MachineBasicBlock* Default,
-                                                   MachineBasicBlock* SwitchBB) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT PTy = TLI.getPointerTy();
-  unsigned IntPtrBits = PTy.getSizeInBits();
-
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // If target does not have legal shift left, do not emit bit tests at all.
-  if (!TLI.isOperationLegal(ISD::SHL, PTy))
-    return false;
-
-  size_t numCmps = 0;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-    // Single case counts one, case range - two.
-    numCmps += (I->Low == I->High ? 1 : 2);
-  }
-
-  // Count unique destinations
-  SmallSet<MachineBasicBlock*, 4> Dests;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-    Dests.insert(I->BB);
-    if (Dests.size() > 3)
-      // Don't bother the code below, if there are too much unique destinations
-      return false;
-  }
-  DEBUG(dbgs() << "Total number of unique destinations: "
-        << Dests.size() << '\n'
-        << "Total number of comparisons: " << numCmps << '\n');
-
-  // Compute span of values.
-  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
-  APInt cmpRange = maxValue - minValue;
-
-  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
-               << "Low bound: " << minValue << '\n'
-               << "High bound: " << maxValue << '\n');
-
-  if (cmpRange.uge(IntPtrBits) ||
-      (!(Dests.size() == 1 && numCmps >= 3) &&
-       !(Dests.size() == 2 && numCmps >= 5) &&
-       !(Dests.size() >= 3 && numCmps >= 6)))
-    return false;
-
-  DEBUG(dbgs() << "Emitting bit tests\n");
-  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
-
-  // Optimize the case where all the case values fit in a
-  // word without having to subtract minValue. In this case,
-  // we can optimize away the subtraction.
-  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
-    cmpRange = maxValue;
-  } else {
-    lowBound = minValue;
-  }
-
-  CaseBitsVector CasesBits;
-  unsigned i, count = 0;
-
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
-    MachineBasicBlock* Dest = I->BB;
-    for (i = 0; i < count; ++i)
-      if (Dest == CasesBits[i].BB)
-        break;
-
-    if (i == count) {
-      assert((count < 3) && "Too much destinations to test!");
-      CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
-      count++;
-    }
-
-    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
-    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
-
-    uint64_t lo = (lowValue - lowBound).getZExtValue();
-    uint64_t hi = (highValue - lowBound).getZExtValue();
-    CasesBits[i].ExtraWeight += I->ExtraWeight;
-
-    for (uint64_t j = lo; j <= hi; j++) {
-      CasesBits[i].Mask |=  1ULL << j;
-      CasesBits[i].Bits++;
-    }
-
-  }
-  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
-
-  BitTestInfo BTC;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
-
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  DEBUG(dbgs() << "Cases:\n");
-  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
-    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
-                 << ", Bits: " << CasesBits[i].Bits
-                 << ", BB: " << CasesBits[i].BB << '\n');
-
-    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, CaseBB);
-    BTC.push_back(BitTestCase(CasesBits[i].Mask,
-                              CaseBB,
-                              CasesBits[i].BB, CasesBits[i].ExtraWeight));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),
-                   CR.CaseBB, Default, std::move(BTC));
-
-  if (CR.CaseBB == SwitchBB)
-    visitBitTestHeader(BTB, SwitchBB);
-
-  BitTestCases.push_back(std::move(BTB));
-
-  return true;
-}
-
-/// Clusterify - Transform simple list of Cases into list of CaseRange's
-void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
-                                     const SwitchInst& SI) {
-  BranchProbabilityInfo *BPI = FuncInfo.BPI;
-  // Start with "simple" cases.
-  for (SwitchInst::ConstCaseIt i : SI.cases()) {
-    const BasicBlock *SuccBB = i.getCaseSuccessor();
-    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
-
-    uint32_t ExtraWeight =
-      BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0;
-
-    Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
-                         SMBB, ExtraWeight));
-  }
-  std::sort(Cases.begin(), Cases.end(), CaseCmp());
-
-  // Merge case into clusters
-  if (Cases.size() >= 2)
-    // Must recompute end() each iteration because it may be
-    // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
-         J != Cases.end(); ) {
-      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
-      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
-      MachineBasicBlock* nextBB = J->BB;
-      MachineBasicBlock* currentBB = I->BB;
-
-      // If the two neighboring cases go to the same destination, merge them
-      // into a single case.
-      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
-        I->High = J->High;
-        I->ExtraWeight += J->ExtraWeight;
-        J = Cases.erase(J);
-      } else {
-        I = J++;
-      }
-    }
-
-  DEBUG({
-      size_t numCmps = 0;
-      for (auto &I : Cases)
-        // A range counts double, since it requires two compares.
-        numCmps += I.Low != I.High ? 2 : 1;
-
-      dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
-             << ". Total compares: " << numCmps << '\n';
-    });
+  Clusters.resize(DstIndex);
 }
 
 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
@@ -2696,96 +2093,6 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
       BitTestCases[i].Parent = Last;
 }
 
-void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
-  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
-
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = nullptr;
-  if (SwitchMBB + 1 != FuncInfo.MF->end())
-    NextBlock = SwitchMBB + 1;
-
-
-  // Create a vector of Cases, sorted so that we can efficiently create a binary
-  // search tree from them.
-  CaseVector Cases;
-  Clusterify(Cases, SI);
-
-  // Get the default destination MBB.
-  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
-
-  if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) &&
-      !Cases.empty()) {
-    // Replace an unreachable default destination with the most popular case
-    // destination.
-    DenseMap<const BasicBlock *, unsigned> Popularity;
-    unsigned MaxPop = 0;
-    const BasicBlock *MaxBB = nullptr;
-    for (auto I : SI.cases()) {
-      const BasicBlock *BB = I.getCaseSuccessor();
-      if (++Popularity[BB] > MaxPop) {
-        MaxPop = Popularity[BB];
-        MaxBB = BB;
-      }
-    }
-
-    // Set new default.
-    assert(MaxPop > 0);
-    assert(MaxBB);
-    Default = FuncInfo.MBBMap[MaxBB];
-
-    // Remove cases that were pointing to the destination that is now the default.
-    Cases.erase(std::remove_if(Cases.begin(), Cases.end(),
-                               [&](const Case &C) { return C.BB == Default; }),
-                Cases.end());
-  }
-
-  // If there is only the default destination, go there directly.
-  if (Cases.empty()) {
-    // Update machine-CFG edges.
-    SwitchMBB->addSuccessor(Default);
-
-    // If this is not a fall-through branch, emit the branch.
-    if (Default != NextBlock) {
-      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
-                              getControlRoot(), DAG.getBasicBlock(Default)));
-    }
-    return;
-  }
-
-  // Get the Value to be switched on.
-  const Value *SV = SI.getCondition();
-
-  // Push the initial CaseRec onto the worklist
-  CaseRecVector WorkList;
-  WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr,
-                             CaseRange(Cases.begin(),Cases.end())));
-
-  while (!WorkList.empty()) {
-    // Grab a record representing a case range to process off the worklist
-    CaseRec CR = WorkList.back();
-    WorkList.pop_back();
-
-    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
-      continue;
-
-    // If the range has few cases (two or less) emit a series of specific
-    // tests.
-    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
-      continue;
-
-    // If the switch has more than N blocks, and is at least 40% dense, and the
-    // target supports indirect branches, then emit a jump table rather than
-    // lowering the switch to a binary tree of conditional branches.
-    // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
-    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
-      continue;
-
-    // Emit binary tree. We need to pick a pivot, and push left and right ranges
-    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
-    handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB);
-  }
-}
-
 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
   MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
 
@@ -2950,19 +2257,47 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
 
   SmallVector<SDValue, 4> Values(NumValues);
   SDValue Cond     = getValue(I.getOperand(0));
-  SDValue TrueVal  = getValue(I.getOperand(1));
-  SDValue FalseVal = getValue(I.getOperand(2));
+  SDValue LHSVal   = getValue(I.getOperand(1));
+  SDValue RHSVal   = getValue(I.getOperand(2));
+  auto BaseOps = {Cond};
   ISD::NodeType OpCode = Cond.getValueType().isVector() ?
     ISD::VSELECT : ISD::SELECT;
 
-  for (unsigned i = 0; i != NumValues; ++i)
+  // Min/max matching is only viable if all output VTs are the same.
+  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
+    Value *LHS, *RHS;
+    SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+    ISD::NodeType Opc = ISD::DELETED_NODE;
+    switch (SPF) {
+    case SPF_UMAX: Opc = ISD::UMAX; break;
+    case SPF_UMIN: Opc = ISD::UMIN; break;
+    case SPF_SMAX: Opc = ISD::SMAX; break;
+    case SPF_SMIN: Opc = ISD::SMIN; break;
+    default: break;
+    }
+
+    EVT VT = ValueVTs[0];
+    LLVMContext &Ctx = *DAG.getContext();
+    auto &TLI = DAG.getTargetLoweringInfo();
+    while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
+      VT = TLI.getTypeToTransformTo(Ctx, VT);
+
+    if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT)) {
+      OpCode = Opc;
+      LHSVal = getValue(LHS);
+      RHSVal = getValue(RHS);
+      BaseOps = {};
+    }
+  }
+
+  for (unsigned i = 0; i != NumValues; ++i) {
+    SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
+    Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+    Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
     Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
-                            TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
-                            Cond,
-                            SDValue(TrueVal.getNode(),
-                                    TrueVal.getResNo() + i),
-                            SDValue(FalseVal.getNode(),
-                                    FalseVal.getResNo() + i));
+                            LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
+                            Ops);
+  }
 
   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
                            DAG.getVTList(ValueVTs), Values));
@@ -2994,10 +2329,11 @@ void SelectionDAGBuilder::visitSExt(const User &I) {
 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
+  SDLoc dl = getCurSDLoc();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N,
-                           DAG.getTargetConstant(0, TLI.getPointerTy())));
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
+                           DAG.getTargetConstant(0, dl, TLI.getPointerTy())));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I) {
@@ -3053,19 +2389,20 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
 
 void SelectionDAGBuilder::visitBitCast(const User &I) {
   SDValue N = getValue(I.getOperand(0));
+  SDLoc dl = getCurSDLoc();
   EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
 
   // BitCast assures us that source and destination are the same size so this is
   // either a BITCAST or a no-op.
   if (DestVT != N.getValueType())
-    setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(),
+    setValue(&I, DAG.getNode(ISD::BITCAST, dl,
                              DestVT, N)); // convert types.
   // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
   // might fold any kind of constant expression to an integer constant and that
   // is not what we are looking for. Only regcognize a bitcast of a genuine
   // constant integer as an opaque constant.
   else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
-    setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false,
+    setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
                                  /*isOpaque*/true));
   else
     setValue(&I, N);            // noop cast.
@@ -3243,10 +2580,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
         SDValue &Src = Input == 0 ? Src1 : Src2;
         if (RangeUse[Input] == 0)
           Src = DAG.getUNDEF(VT);
-        else
+        else {
+          SDLoc dl = getCurSDLoc();
           Src = DAG.getNode(
-              ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src,
-              DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy()));
+              ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
+              DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy()));
+        }
       }
 
       // Calculate new mask.
@@ -3273,6 +2612,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   // to insert and build vector.
   EVT EltVT = VT.getVectorElementType();
   EVT IdxVT = TLI.getVectorIdxTy();
+  SDLoc dl = getCurSDLoc();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     int Idx = Mask[i];
@@ -3284,14 +2624,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
       SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
       if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
 
-      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
-                        EltVT, Src, DAG.getConstant(Idx, IdxVT));
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                        EltVT, Src, DAG.getConstant(Idx, dl, IdxVT));
     }
 
     Ops.push_back(Res);
   }
 
-  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops));
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops));
 }
 
 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
@@ -3383,6 +2723,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   Type *Ty = Op0->getType()->getScalarType();
   unsigned AS = Ty->getPointerAddressSpace();
   SDValue N = getValue(Op0);
+  SDLoc dl = getCurSDLoc();
 
   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
@@ -3392,8 +2733,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       if (Field) {
         // N = N + Offset
         uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
-        N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
-                        DAG.getConstant(Offset, N.getValueType()));
+        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
+                        DAG.getConstant(Offset, dl, N.getValueType()));
       }
 
       Ty = StTy->getElementType(Field);
@@ -3408,8 +2749,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         if (CI->isZero())
           continue;
         APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
-        SDValue OffsVal = DAG.getConstant(Offs, PtrTy);
-        N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal);
+        SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy);
+        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
         continue;
       }
 
@@ -3418,24 +2759,24 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
 
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
-      IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType());
+      IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
 
       // If this is a multiply by a power of two, turn it into a shl
       // immediately.  This is a very common case.
       if (ElementSize != 1) {
         if (ElementSize.isPowerOf2()) {
           unsigned Amt = ElementSize.logBase2();
-          IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(),
+          IdxN = DAG.getNode(ISD::SHL, dl,
                              N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, IdxN.getValueType()));
+                             DAG.getConstant(Amt, dl, IdxN.getValueType()));
         } else {
-          SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType());
-          IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(),
+          SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
+          IdxN = DAG.getNode(ISD::MUL, dl,
                              N.getValueType(), IdxN, Scale);
         }
       }
 
-      N = DAG.getNode(ISD::ADD, getCurSDLoc(),
+      N = DAG.getNode(ISD::ADD, dl,
                       N.getValueType(), N, IdxN);
     }
   }
@@ -3449,6 +2790,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   if (FuncInfo.StaticAllocaMap.count(&I))
     return;   // getValue will auto-populate this.
 
+  SDLoc dl = getCurSDLoc();
   Type *Ty = I.getAllocatedType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
@@ -3460,11 +2802,11 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
 
   EVT IntPtr = TLI.getPointerTy();
   if (AllocSize.getValueType() != IntPtr)
-    AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr);
+    AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
 
-  AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr,
+  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
                           AllocSize,
-                          DAG.getConstant(TySize, IntPtr));
+                          DAG.getConstant(TySize, dl, IntPtr));
 
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
@@ -3476,18 +2818,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
 
   // Round the size of the allocation up to the stack alignment size
   // by add SA-1 to the size.
-  AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(),
+  AllocSize = DAG.getNode(ISD::ADD, dl,
                           AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(StackAlign-1));
+                          DAG.getIntPtrConstant(StackAlign - 1, dl));
 
   // Mask out the low bits for alignment purposes.
-  AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(),
+  AllocSize = DAG.getNode(ISD::AND, dl,
                           AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
+                                                dl));
 
-  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
   SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
-  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops);
+  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
   setValue(&I, DSA);
   DAG.setRoot(DSA.getValue(1));
 
@@ -3535,8 +2878,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
     Root = DAG.getRoot();
   }
 
+  SDLoc dl = getCurSDLoc();
+
   if (isVolatile)
-    Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+    Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
 
   SmallVector<SDValue, 4> Values(NumValues);
   SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
@@ -3552,15 +2897,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
     // (MaxParallelChains should always remain as failsafe).
     if (ChainI == MaxParallelChains) {
       assert(PendingLoads.empty() && "PendingLoads must be serialized first");
-      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                                   makeArrayRef(Chains.data(), ChainI));
       Root = Chain;
       ChainI = 0;
     }
-    SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(),
+    SDValue A = DAG.getNode(ISD::ADD, dl,
                             PtrVT, Ptr,
-                            DAG.getConstant(Offsets[i], PtrVT));
-    SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root,
+                            DAG.getConstant(Offsets[i], dl, PtrVT));
+    SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
                             A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
                             isNonTemporal, isInvariant, Alignment, AAInfo,
                             Ranges);
@@ -3570,7 +2915,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   }
 
   if (!ConstantMemory) {
-    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                                 makeArrayRef(Chains.data(), ChainI));
     if (isVolatile)
       DAG.setRoot(Chain);
@@ -3578,7 +2923,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
       PendingLoads.push_back(Chain);
   }
 
-  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
                            DAG.getVTList(ValueVTs), Values));
 }
 
@@ -3610,6 +2955,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
   unsigned Alignment = I.getAlignment();
+  SDLoc dl = getCurSDLoc();
 
   AAMDNodes AAInfo;
   I.getAAMetadata(AAInfo);
@@ -3618,21 +2964,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
     // See visitLoad comments.
     if (ChainI == MaxParallelChains) {
-      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                                   makeArrayRef(Chains.data(), ChainI));
       Root = Chain;
       ChainI = 0;
     }
-    SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr,
-                              DAG.getConstant(Offsets[i], PtrVT));
-    SDValue St = DAG.getStore(Root, getCurSDLoc(),
+    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
+                              DAG.getConstant(Offsets[i], dl, PtrVT));
+    SDValue St = DAG.getStore(Root, dl,
                               SDValue(Src.getNode(), Src.getResNo() + i),
                               Add, MachinePointerInfo(PtrV, Offsets[i]),
                               isVolatile, isNonTemporal, Alignment, AAInfo);
     Chains[ChainI] = St;
   }
 
-  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                                   makeArrayRef(Chains.data(), ChainI));
   DAG.setRoot(StoreNode);
 }
@@ -3664,6 +3010,94 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
   setValue(&I, StoreNode);
 }
 
+// Gather/scatter receive a vector of pointers.
+// This vector of pointers may be represented as a base pointer + vector of 
+// indices, it depends on GEP and instruction preceeding GEP
+// that calculates indices
+static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
+                           SelectionDAGBuilder* SDB) {
+
+  assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+  GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!Gep || Gep->getNumOperands() > 2)
+    return false;
+  ShuffleVectorInst *ShuffleInst = 
+    dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
+  if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
+      cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
+      Instruction::InsertElement)
+    return false;
+
+  Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
+
+  SelectionDAG& DAG = SDB->DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // Check is the Ptr is inside current basic block
+  // If not, look for the shuffle instruction
+  if (SDB->findValue(Ptr))
+    Base = SDB->getValue(Ptr);
+  else if (SDB->findValue(ShuffleInst)) {
+    SDValue ShuffleNode = SDB->getValue(ShuffleInst);
+    SDLoc sdl = ShuffleNode;
+    Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl,
+                       ShuffleNode.getValueType().getScalarType(), ShuffleNode,
+                       DAG.getConstant(0, sdl, TLI.getVectorIdxTy()));
+    SDB->setValue(Ptr, Base);
+  }
+  else
+    return false;
+
+  Value *IndexVal = Gep->getOperand(1);
+  if (SDB->findValue(IndexVal)) {
+    Index = SDB->getValue(IndexVal);
+
+    if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+      IndexVal = Sext->getOperand(0);
+      if (SDB->findValue(IndexVal))
+        Index = SDB->getValue(IndexVal);
+    }
+    return true;
+  }
+  return false;
+}
+
+void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
+  SDLoc sdl = getCurSDLoc();
+
+  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
+  Value  *Ptr = I.getArgOperand(1);
+  SDValue Src0 = getValue(I.getArgOperand(0));
+  SDValue Mask = getValue(I.getArgOperand(3));
+  EVT VT = Src0.getValueType();
+  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+  if (!Alignment)
+    Alignment = DAG.getEVTAlignment(VT);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  AAMDNodes AAInfo;
+  I.getAAMetadata(AAInfo);
+
+  SDValue Base;
+  SDValue Index;
+  Value *BasePtr = Ptr;
+  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+
+  Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
+                         MachineMemOperand::MOStore,  VT.getStoreSize(),
+                         Alignment, AAInfo);
+  if (!UniformBase) {
+    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
+    Index = getValue(Ptr);
+  }
+  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
+  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
+                                         Ops, MMO);
+  DAG.setRoot(Scatter);
+  setValue(&I, Scatter);
+}
+
 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
   SDLoc sdl = getCurSDLoc();
 
@@ -3705,6 +3139,59 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
   setValue(&I, Load);
 }
 
+void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
+  SDLoc sdl = getCurSDLoc();
+
+  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
+  Value  *Ptr = I.getArgOperand(0);
+  SDValue Src0 = getValue(I.getArgOperand(3));
+  SDValue Mask = getValue(I.getArgOperand(2));
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = TLI.getValueType(I.getType());
+  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+  if (!Alignment)
+    Alignment = DAG.getEVTAlignment(VT);
+
+  AAMDNodes AAInfo;
+  I.getAAMetadata(AAInfo);
+  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+  SDValue Root = DAG.getRoot();
+  SDValue Base;
+  SDValue Index;
+  Value *BasePtr = Ptr;
+  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+  bool ConstantMemory = false;
+  if (UniformBase && AA->pointsToConstantMemory(
+      AliasAnalysis::Location(BasePtr,
+	                            AA->getTypeStoreSize(I.getType()),
+                              AAInfo))) {
+    // Do not serialize (non-volatile) loads of constant memory with anything.
+    Root = DAG.getEntryNode();
+    ConstantMemory = true;
+  }
+
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction().
+    getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
+                         MachineMemOperand::MOLoad,  VT.getStoreSize(),
+                         Alignment, AAInfo, Ranges);
+
+  if (!UniformBase) {
+    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
+    Index = getValue(Ptr);
+  }
+  SDValue Ops[] = { Root, Src0, Mask, Base, Index };
+  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
+                                       Ops, MMO);
+
+  SDValue OutChain = Gather.getValue(1);
+  if (!ConstantMemory)
+    PendingLoads.push_back(OutChain);
+  setValue(&I, Gather);
+}
+
 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
   SDLoc dl = getCurSDLoc();
   AtomicOrdering SuccessOrder = I.getSuccessOrdering();
@@ -3769,8 +3256,8 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Ops[3];
   Ops[0] = getRoot();
-  Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
-  Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+  Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy());
   DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
 }
 
@@ -3859,7 +3346,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
       Info.opc == ISD::INTRINSIC_W_CHAIN)
-    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
+    Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+                                        TLI.getPointerTy()));
 
   // Add all operands of the call to the operand list.
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
@@ -3919,9 +3407,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
 static SDValue
 GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) {
   SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
-                           DAG.getConstant(0x007fffff, MVT::i32));
+                           DAG.getConstant(0x007fffff, dl, MVT::i32));
   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
-                           DAG.getConstant(0x3f800000, MVT::i32));
+                           DAG.getConstant(0x3f800000, dl, MVT::i32));
   return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
 }
 
@@ -3934,21 +3422,108 @@ static SDValue
 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
             SDLoc dl) {
   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
-                           DAG.getConstant(0x7f800000, MVT::i32));
+                           DAG.getConstant(0x7f800000, dl, MVT::i32));
   SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
-                           DAG.getConstant(23, TLI.getPointerTy()));
+                           DAG.getConstant(23, dl, TLI.getPointerTy()));
   SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
-                           DAG.getConstant(127, MVT::i32));
+                           DAG.getConstant(127, dl, MVT::i32));
   return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
 }
 
 /// getF32Constant - Get 32-bit floating point constant.
 static SDValue
-getF32Constant(SelectionDAG &DAG, unsigned Flt) {
-  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)),
+getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
+  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl,
                            MVT::f32);
 }
 
+static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
+                                       SelectionDAG &DAG) {
+  //   IntegerPartOfX = ((int32_t)(t0);
+  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+  //   FractionalPartOfX = t0 - (float)IntegerPartOfX;
+  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+  //   IntegerPartOfX <<= 23;
+  IntegerPartOfX = DAG.getNode(
+      ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+      DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy()));
+
+  SDValue TwoToFractionalPartOfX;
+  if (LimitFloatPrecision <= 6) {
+    // For floating-point precision of 6:
+    //
+    //   TwoToFractionalPartOfX =
+    //     0.997535578f +
+    //       (0.735607626f + 0.252464424f * x) * x;
+    //
+    // error 0.0144103317, which is 6 bits
+    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                             getF32Constant(DAG, 0x3e814304, dl));
+    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                             getF32Constant(DAG, 0x3f3c50c8, dl));
+    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                                         getF32Constant(DAG, 0x3f7f5e7e, dl));
+  } else if (LimitFloatPrecision <= 12) {
+    // For floating-point precision of 12:
+    //
+    //   TwoToFractionalPartOfX =
+    //     0.999892986f +
+    //       (0.696457318f +
+    //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+    //
+    // error 0.000107046256, which is 13 to 14 bits
+    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                             getF32Constant(DAG, 0x3da235e3, dl));
+    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                             getF32Constant(DAG, 0x3e65b8f3, dl));
+    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+    SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                             getF32Constant(DAG, 0x3f324b07, dl));
+    SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                                         getF32Constant(DAG, 0x3f7ff8fd, dl));
+  } else { // LimitFloatPrecision <= 18
+    // For floating-point precision of 18:
+    //
+    //   TwoToFractionalPartOfX =
+    //     0.999999982f +
+    //       (0.693148872f +
+    //         (0.240227044f +
+    //           (0.554906021e-1f +
+    //             (0.961591928e-2f +
+    //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+    // error 2.47208000*10^(-7), which is better than 18 bits
+    SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                             getF32Constant(DAG, 0x3924b03e, dl));
+    SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                             getF32Constant(DAG, 0x3ab24b87, dl));
+    SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+    SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                             getF32Constant(DAG, 0x3c1d8c17, dl));
+    SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+    SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                             getF32Constant(DAG, 0x3d634a1d, dl));
+    SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+    SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                             getF32Constant(DAG, 0x3e75fe14, dl));
+    SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+    SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                              getF32Constant(DAG, 0x3f317234, dl));
+    SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+    TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                         getF32Constant(DAG, 0x3f800000, dl));
+  }
+
+  // Add the exponent into the result in integer domain.
+  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                     DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
+}
+
 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
@@ -3960,92 +3535,10 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
     // final result:
     //
     //   #define LOG2OFe 1.4426950f
-    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+    //   t0 = Op * LOG2OFe
     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
-                             getF32Constant(DAG, 0x3fb8aa3b));
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
-    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    SDValue TwoToFracPartOfX;
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                                     getF32Constant(DAG, 0x3f7f5e7e));
-    } else if (LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // 0.000107046256 error, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                                     getF32Constant(DAG, 0x3f7ff8fd));
-    } else { // LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      //
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                     getF32Constant(DAG, 0x3f800000));
-    }
-
-    // Add the exponent into the result in integer domain.
-    SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
-                       DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                   t13, IntegerPartOfX));
+                             getF32Constant(DAG, 0x3fb8aa3b, dl));
+    return getLimitedPrecisionExp2(t0, dl, DAG);
   }
 
   // No special expansion.
@@ -4063,7 +3556,7 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
     // Scale the exponent by log(2) [0.69314718f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
-                                        getF32Constant(DAG, 0x3f317218));
+                                        getF32Constant(DAG, 0x3f317218, dl));
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
@@ -4079,12 +3572,12 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0034276066, which is better than 8 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbe74c456));
+                               getF32Constant(DAG, 0xbe74c456, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3fb3a2b1));
+                               getF32Constant(DAG, 0x3fb3a2b1, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                  getF32Constant(DAG, 0x3f949a29));
+                                  getF32Constant(DAG, 0x3f949a29, dl));
     } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -4096,18 +3589,18 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.000061011436, which is 14 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbd67b6d6));
+                               getF32Constant(DAG, 0xbd67b6d6, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3ee4f4b8));
+                               getF32Constant(DAG, 0x3ee4f4b8, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3fbc278b));
+                               getF32Constant(DAG, 0x3fbc278b, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40348e95));
+                               getF32Constant(DAG, 0x40348e95, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                  getF32Constant(DAG, 0x3fdef31a));
+                                  getF32Constant(DAG, 0x3fdef31a, dl));
     } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -4121,24 +3614,24 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0000023660568, which is better than 18 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbc91e5ac));
+                               getF32Constant(DAG, 0xbc91e5ac, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e4350aa));
+                               getF32Constant(DAG, 0x3e4350aa, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f60d3e3));
+                               getF32Constant(DAG, 0x3f60d3e3, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x4011cdf0));
+                               getF32Constant(DAG, 0x4011cdf0, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x406cfd1c));
+                               getF32Constant(DAG, 0x406cfd1c, dl));
       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x408797cb));
+                               getF32Constant(DAG, 0x408797cb, dl));
       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
       LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                  getF32Constant(DAG, 0x4006dcab));
+                                  getF32Constant(DAG, 0x4006dcab, dl));
     }
 
     return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
@@ -4173,12 +3666,12 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0049451742, which is more than 7 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbeb08fe0));
+                               getF32Constant(DAG, 0xbeb08fe0, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x40019463));
+                               getF32Constant(DAG, 0x40019463, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                   getF32Constant(DAG, 0x3fd6633d));
+                                   getF32Constant(DAG, 0x3fd6633d, dl));
     } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -4190,18 +3683,18 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0000876136000, which is better than 13 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbda7262e));
+                               getF32Constant(DAG, 0xbda7262e, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3f25280b));
+                               getF32Constant(DAG, 0x3f25280b, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x4007b923));
+                               getF32Constant(DAG, 0x4007b923, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40823e2f));
+                               getF32Constant(DAG, 0x40823e2f, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                   getF32Constant(DAG, 0x4020d29c));
+                                   getF32Constant(DAG, 0x4020d29c, dl));
     } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -4216,24 +3709,24 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0000018516, which is better than 18 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbcd2769e));
+                               getF32Constant(DAG, 0xbcd2769e, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e8ce0b9));
+                               getF32Constant(DAG, 0x3e8ce0b9, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3fa22ae7));
+                               getF32Constant(DAG, 0x3fa22ae7, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40525723));
+                               getF32Constant(DAG, 0x40525723, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x40aaf200));
+                               getF32Constant(DAG, 0x40aaf200, dl));
       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x40c39dad));
+                               getF32Constant(DAG, 0x40c39dad, dl));
       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
       Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                   getF32Constant(DAG, 0x4042902c));
+                                   getF32Constant(DAG, 0x4042902c, dl));
     }
 
     return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
@@ -4254,7 +3747,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
     // Scale the exponent by log10(2) [0.30102999f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
-                                        getF32Constant(DAG, 0x3e9a209a));
+                                        getF32Constant(DAG, 0x3e9a209a, dl));
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
@@ -4270,12 +3763,12 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0014886165, which is 6 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbdd49a13));
+                               getF32Constant(DAG, 0xbdd49a13, dl));
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3f1c0789));
+                               getF32Constant(DAG, 0x3f1c0789, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                    getF32Constant(DAG, 0x3f011300));
+                                    getF32Constant(DAG, 0x3f011300, dl));
     } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -4286,15 +3779,15 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.00019228036, which is better than 12 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3d431f31));
+                               getF32Constant(DAG, 0x3d431f31, dl));
       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3ea21fb2));
+                               getF32Constant(DAG, 0x3ea21fb2, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f6ae232));
+                               getF32Constant(DAG, 0x3f6ae232, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
-                                    getF32Constant(DAG, 0x3f25f7c3));
+                                    getF32Constant(DAG, 0x3f25f7c3, dl));
     } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -4307,21 +3800,21 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
       //
       // error 0.0000037995730, which is better than 18 bits
       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3c5d51ce));
+                               getF32Constant(DAG, 0x3c5d51ce, dl));
       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e00685a));
+                               getF32Constant(DAG, 0x3e00685a, dl));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3efb6798));
+                               getF32Constant(DAG, 0x3efb6798, dl));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f88d192));
+                               getF32Constant(DAG, 0x3f88d192, dl));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3fc4316c));
+                               getF32Constant(DAG, 0x3fc4316c, dl));
       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
       Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
-                                    getF32Constant(DAG, 0x3f57ce70));
+                                    getF32Constant(DAG, 0x3f57ce70, dl));
     }
 
     return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
@@ -4336,91 +3829,8 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
 static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                           const TargetLowering &TLI) {
   if (Op.getValueType() == MVT::f32 &&
-      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
-
-    //   FractionalPartOfX = x - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    SDValue TwoToFractionalPartOfX;
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                                           getF32Constant(DAG, 0x3f7f5e7e));
-    } else if (LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // error 0.000107046256, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                                           getF32Constant(DAG, 0x3f7ff8fd));
-    } else { // LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                           getF32Constant(DAG, 0x3f800000));
-    }
-
-    // Add the exponent into the result in integer domain.
-    SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
-                              TwoToFractionalPartOfX);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
-                       DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                   t13, IntegerPartOfX));
-  }
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
+    return getLimitedPrecisionExp2(Op, dl, DAG);
 
   // No special expansion.
   return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
@@ -4444,90 +3854,10 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
     // final result:
     //
     //   #define LOG2OF10 3.3219281f
-    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
+    //   t0 = Op * LOG2OF10;
     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
-                             getF32Constant(DAG, 0x40549a78));
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
-    //   FractionalPartOfX = x - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    SDValue TwoToFractionalPartOfX;
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   twoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                                           getF32Constant(DAG, 0x3f7f5e7e));
-    } else if (LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // error 0.000107046256, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                                           getF32Constant(DAG, 0x3f7ff8fd));
-    } else { // LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                           getF32Constant(DAG, 0x3f800000));
-    }
-
-    SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
-                       DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                   t13, IntegerPartOfX));
+                             getF32Constant(DAG, 0x40549a78, dl));
+    return getLimitedPrecisionExp2(t0, dl, DAG);
   }
 
   // No special expansion.
@@ -4549,14 +3879,13 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
 
     // powi(x, 0) -> 1.0
     if (Val == 0)
-      return DAG.getConstantFP(1.0, LHS.getValueType());
+      return DAG.getConstantFP(1.0, DL, LHS.getValueType());
 
     const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                         Attribute::OptimizeForSize) ||
+    if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
         // If optimizing for size, don't insert too many multiplies.  This
         // inserts up to 5 multiplies.
-        CountPopulation_32(Val)+Log2_32(Val) < 7) {
+        countPopulation(Val) + Log2_32(Val) < 7) {
       // We use the simple binary decomposition method to generate the multiply
       // sequence.  There are more optimal ways to do this (for example,
       // powi(x,15) generates one more multiply than it should), but this has
@@ -4579,7 +3908,7 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
       // If the original was negative, invert the result, producing 1/(x*x*x).
       if (RHSC->getSExtValue() < 0)
         Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
-                          DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+                          DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
       return Res;
     }
   }
@@ -4609,11 +3938,9 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
 /// argument, create the corresponding DBG_VALUE machine instruction for it now.
 /// At the end of instruction selection, they will be inserted to the entry BB.
-bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
-                                                   MDNode *Variable,
-                                                   MDNode *Expr, int64_t Offset,
-                                                   bool IsIndirect,
-                                                   const SDValue &N) {
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
+    const Value *V, DILocalVariable *Variable, DIExpression *Expr,
+    DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
   const Argument *Arg = dyn_cast<Argument>(V);
   if (!Arg)
     return false;
@@ -4622,8 +3949,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
   const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
   // Ignore inlined function arguments here.
-  DIVariable DV(Variable);
-  if (DV.isInlinedFnArgument(MF.getFunction()))
+  //
+  // FIXME: Should we be checking DL->inlinedAt() to determine this?
+  if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
     return false;
 
   Optional<MachineOperand> Op;
@@ -4664,13 +3992,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
   if (!Op)
     return false;
 
+  assert(Variable->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   if (Op->isReg())
     FuncInfo.ArgDbgValues.push_back(
-        BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE),
-                IsIndirect, Op->getReg(), Offset, Variable, Expr));
+        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+                Op->getReg(), Offset, Variable, Expr));
   else
     FuncInfo.ArgDbgValues.push_back(
-        BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
+        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
             .addOperand(*Op)
             .addImm(Offset)
             .addMetadata(Variable)
@@ -4715,16 +4045,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   case Intrinsic::read_register: {
     Value *Reg = I.getArgOperand(0);
+    SDValue Chain = getRoot();
     SDValue RegName =
         DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
     EVT VT = TLI.getValueType(I.getType());
-    setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName));
+    Res = DAG.getNode(ISD::READ_REGISTER, sdl,
+      DAG.getVTList(VT, MVT::Other), Chain, RegName);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
     return nullptr;
   }
   case Intrinsic::write_register: {
     Value *Reg = I.getArgOperand(0);
     Value *RegValue = I.getArgOperand(1);
-    SDValue Chain = getValue(RegValue).getOperand(0);
+    SDValue Chain = getRoot();
     SDValue RegName =
         DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
     DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
@@ -4736,6 +4070,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::longjmp:
     return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
   case Intrinsic::memcpy: {
+    // FIXME: this definition of "user defined address space" is x86-specific
     // Assert for address < 256 since we support only user defined address
     // spaces.
     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4750,12 +4085,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false,
-                              MachinePointerInfo(I.getArgOperand(0)),
-                              MachinePointerInfo(I.getArgOperand(1))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                               false, isTC,
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1)));
+    updateDAGForMaybeTailCall(MC);
     return nullptr;
   }
   case Intrinsic::memset: {
+    // FIXME: this definition of "user defined address space" is x86-specific
     // Assert for address < 256 since we support only user defined address
     // spaces.
     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4768,11 +4107,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                              MachinePointerInfo(I.getArgOperand(0))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                               isTC, MachinePointerInfo(I.getArgOperand(0)));
+    updateDAGForMaybeTailCall(MS);
     return nullptr;
   }
   case Intrinsic::memmove: {
+    // FIXME: this definition of "user defined address space" is x86-specific
     // Assert for address < 256 since we support only user defined address
     // spaces.
     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
@@ -4787,20 +4129,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                               MachinePointerInfo(I.getArgOperand(0)),
-                               MachinePointerInfo(I.getArgOperand(1))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                                isTC, MachinePointerInfo(I.getArgOperand(0)),
+                                MachinePointerInfo(I.getArgOperand(1)));
+    updateDAGForMaybeTailCall(MM);
     return nullptr;
   }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-    MDNode *Variable = DI.getVariable();
-    MDNode *Expression = DI.getExpression();
+    DILocalVariable *Variable = DI.getVariable();
+    DIExpression *Expression = DI.getExpression();
     const Value *Address = DI.getAddress();
-    DIVariable DIVar(Variable);
-    assert((!DIVar || DIVar.isVariable()) &&
-      "Variable in DbgDeclareInst should be either null or a DIVariable.");
-    if (!Address || !DIVar) {
+    assert(Variable && "Missing variable");
+    if (!Address) {
       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return nullptr;
     }
@@ -4821,9 +4163,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
       // Parameters are handled specially.
-      bool isParameter =
-        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
-         isa<Argument>(Address));
+      bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable ||
+                         isa<Argument>(Address);
 
       const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
 
@@ -4836,7 +4177,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         else {
           // Address is an argument, so try to emit its dbg value using
           // virtual register info from the FuncInfo.ValueMap.
-          EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N);
+          EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+                                   N);
           return nullptr;
         }
       } else if (AI)
@@ -4853,7 +4195,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     } else {
       // If Address is an argument then try to emit its dbg value using
       // virtual register info from the FuncInfo.ValueMap.
-      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false,
+      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
                                     N)) {
         // If variable is pinned by a alloca in dominating bb then
         // use StaticAllocaMap.
@@ -4876,14 +4218,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_value: {
     const DbgValueInst &DI = cast<DbgValueInst>(I);
-    DIVariable DIVar(DI.getVariable());
-    assert((!DIVar || DIVar.isVariable()) &&
-      "Variable in DbgValueInst should be either null or a DIVariable.");
-    if (!DIVar)
-      return nullptr;
+    assert(DI.getVariable() && "Missing variable");
 
-    MDNode *Variable = DI.getVariable();
-    MDNode *Expression = DI.getExpression();
+    DILocalVariable *Variable = DI.getVariable();
+    DIExpression *Expression = DI.getExpression();
     uint64_t Offset = DI.getOffset();
     const Value *V = DI.getValue();
     if (!V)
@@ -4904,7 +4242,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (N.getNode()) {
         // A dbg.value for an alloca is always indirect.
         bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
-        if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset,
+        if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
                                       IsIndirect, N)) {
           SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
                                 IsIndirect, Offset, dl, SDNodeOrder);
@@ -4943,7 +4281,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Find the type id for the given typeinfo.
     GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
     unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
-    Res = DAG.getConstant(TypeID, MVT::i32);
+    Res = DAG.getConstant(TypeID, sdl, MVT::i32);
     setValue(&I, Res);
     return nullptr;
   }
@@ -4969,7 +4307,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                              CfaArg.getValueType()),
                                  CfaArg);
     SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
-                             DAG.getConstant(0, TLI.getPointerTy()));
+                             DAG.getConstant(0, sdl, TLI.getPointerTy()));
     setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
                              FA, Offset));
     return nullptr;
@@ -5008,9 +4346,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
 
+  case Intrinsic::masked_gather:
+    visitMaskedGather(I);
+    return nullptr;
   case Intrinsic::masked_load:
     visitMaskedLoad(I);
     return nullptr;
+  case Intrinsic::masked_scatter:
+    visitMaskedScatter(I);
+    return nullptr;
   case Intrinsic::masked_store:
     visitMaskedStore(I);
     return nullptr;
@@ -5063,44 +4407,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // We must do this early because v2i32 is not a legal type.
     SDValue ShOps[2];
     ShOps[0] = ShAmt;
-    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
     ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
     EVT DestVT = TLI.getValueType(I.getType());
     ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
     Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
-                       DAG.getConstant(NewIntrinsic, MVT::i32),
+                       DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
                        getValue(I.getArgOperand(0)), ShAmt);
     setValue(&I, Res);
     return nullptr;
   }
-  case Intrinsic::x86_avx_vinsertf128_pd_256:
-  case Intrinsic::x86_avx_vinsertf128_ps_256:
-  case Intrinsic::x86_avx_vinsertf128_si_256:
-  case Intrinsic::x86_avx2_vinserti128: {
-    EVT DestVT = TLI.getValueType(I.getType());
-    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
-    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
-                   ElVT.getVectorNumElements();
-    Res =
-        DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
-                    getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
-                    DAG.getConstant(Idx, TLI.getVectorIdxTy()));
-    setValue(&I, Res);
-    return nullptr;
-  }
-  case Intrinsic::x86_avx_vextractf128_pd_256:
-  case Intrinsic::x86_avx_vextractf128_ps_256:
-  case Intrinsic::x86_avx_vextractf128_si_256:
-  case Intrinsic::x86_avx2_vextracti128: {
-    EVT DestVT = TLI.getValueType(I.getType());
-    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
-                   DestVT.getVectorNumElements();
-    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
-                      getValue(I.getArgOperand(0)),
-                      DAG.getConstant(Idx, TLI.getVectorIdxTy()));
-    setValue(&I, Res);
-    return nullptr;
-  }
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -5238,7 +4554,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
                              DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
                                          getValue(I.getArgOperand(0)),
-                                         DAG.getTargetConstant(0, MVT::i32))));
+                                         DAG.getTargetConstant(0, sdl,
+                                                               MVT::i32))));
     return nullptr;
   case Intrinsic::convert_from_fp16:
     setValue(&I,
@@ -5366,9 +4683,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     EVT Ty = Arg.getValueType();
 
     if (CI->isZero())
-      Res = DAG.getConstant(-1ULL, Ty);
+      Res = DAG.getConstant(-1ULL, sdl, Ty);
     else
-      Res = DAG.getConstant(0, Ty);
+      Res = DAG.getConstant(0, sdl, Ty);
 
     setValue(&I, Res);
     return nullptr;
@@ -5498,7 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       return nullptr;
 
     SmallVector<Value *, 4> Allocas;
-    GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL);
+    GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
 
     for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
            E = Allocas.end(); Object != E; ++Object) {
@@ -5547,6 +4864,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::clear_cache:
     return TLI.getClearCacheBuiltinName();
+  case Intrinsic::eh_actions:
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return nullptr;
   case Intrinsic::donothing:
     // ignore
     return nullptr;
@@ -5565,7 +4885,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::experimental_gc_result_int:
   case Intrinsic::experimental_gc_result_float:
-  case Intrinsic::experimental_gc_result_ptr: {
+  case Intrinsic::experimental_gc_result_ptr:
+  case Intrinsic::experimental_gc_result: {
     visitGCResult(I);
     return nullptr;
   }
@@ -5576,45 +4897,49 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::instrprof_increment:
     llvm_unreachable("instrprof failed to lower an increment");
 
-  case Intrinsic::frameallocate: {
+  case Intrinsic::frameescape: {
     MachineFunction &MF = DAG.getMachineFunction();
     const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
-    // Do the allocation and map it as a normal value.
-    // FIXME: Maybe we should add this to the alloca map so that we don't have
-    // to register allocate it?
-    uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
-    int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size);
-    MVT PtrVT = TLI.getPointerTy(0);
-    SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT);
-    setValue(&I, FIVal);
-
-    // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is
-    // the same on all targets.
-    MCSymbol *FrameAllocSym =
-        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName());
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
-            TII->get(TargetOpcode::FRAME_ALLOC))
-        .addSym(FrameAllocSym)
-        .addFrameIndex(Alloc);
+    // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission
+    // is the same on all targets.
+    for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+      Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
+      if (isa<ConstantPointerNull>(Arg))
+        continue; // Skip null pointers. They represent a hole in index space.
+      AllocaInst *Slot = cast<AllocaInst>(Arg);
+      assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+             "can only escape static allocas");
+      int FI = FuncInfo.StaticAllocaMap[Slot];
+      MCSymbol *FrameAllocSym =
+          MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+              GlobalValue::getRealLinkageName(MF.getName()), Idx);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+              TII->get(TargetOpcode::FRAME_ALLOC))
+          .addSym(FrameAllocSym)
+          .addFrameIndex(FI);
+    }
 
     return nullptr;
   }
 
   case Intrinsic::framerecover: {
-    // i8* @llvm.framerecover(i8* %fn, i8* %fp)
+    // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx)
     MachineFunction &MF = DAG.getMachineFunction();
     MVT PtrVT = TLI.getPointerTy(0);
 
     // Get the symbol that defines the frame offset.
-    Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+    auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+    auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
+    unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
     MCSymbol *FrameAllocSym =
-        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName());
+        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+            GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
 
     // Create a TargetExternalSymbol for the label to avoid any target lowering
     // that would make this PC relative.
     StringRef Name = FrameAllocSym->getName();
-    assert(Name.size() == strlen(Name.data()) && "not null terminated");
+    assert(Name.data()[Name.size()] == '\0' && "not null terminated");
     SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
     SDValue OffsetVal =
         DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
@@ -5627,6 +4952,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
     return nullptr;
   }
+  case Intrinsic::eh_begincatch:
+  case Intrinsic::eh_endcatch:
+    llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
+  case Intrinsic::eh_exceptioncode: {
+    unsigned Reg = TLI.getExceptionPointerRegister();
+    assert(Reg && "cannot get exception code on this platform");
+    MVT PtrVT = TLI.getPointerTy();
+    const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
+    unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
+    SDValue N =
+        DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
+    N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+    setValue(&I, N);
+    return nullptr;
+  }
   }
 }
 
@@ -5639,7 +4979,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
   if (LandingPad) {
     // Insert a label before the invoke call to mark the try range.  This can be
     // used to detect deletion of the invoke via the MachineModuleInfo.
-    BeginLabel = MMI.getContext().CreateTempSymbol();
+    BeginLabel = MMI.getContext().createTempSymbol();
 
     // For SjLj, keep track of which landing pads go with which invokes
     // so as to maintain the ordering of pads in the LSDA.
@@ -5659,9 +4999,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
 
     CLI.setChain(getRoot());
   }
-
-  const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
-  std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
 
   assert((CLI.IsTailCall || Result.second.getNode()) &&
          "Non-null chain expected with non-tail call!");
@@ -5683,7 +5022,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
   if (LandingPad) {
     // Insert a label at the end of the invoke call to mark the try range.  This
     // can be used to detect deletion of the invoke via the MachineModuleInfo.
-    MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+    MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
     DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
 
     // Inform MachineModuleInfo of range.
@@ -5766,9 +5105,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
     LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
                                          PointerType::getUnqual(LoadTy));
 
-    if (const Constant *LoadCst =
-          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
-                                       Builder.DL))
+    if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
+            const_cast<Constant *>(LoadInput), *Builder.DL))
       return Builder.getValue(LoadCst);
   }
 
@@ -5829,7 +5167,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
   const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
   if (CSize && CSize->getZExtValue() == 0) {
     EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
-    setValue(&I, DAG.getConstant(0, CallVT));
+    setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
     return true;
   }
 
@@ -6112,7 +5450,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
             return;
         }
       }
-      if (unsigned IID = F->getIntrinsicID()) {
+      if (Intrinsic::ID IID = F->getIntrinsicID()) {
         RenameFn = visitIntrinsicCall(I, IID);
         if (!RenameFn)
           return;
@@ -6364,9 +5702,10 @@ static void GetRegistersForValue(SelectionDAG &DAG,
 
   // If this is a constraint for a single physreg, or a constraint for a
   // register class, find it.
-  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
-    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                     OpInfo.ConstraintVT);
+  std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+      TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(),
+                                       OpInfo.ConstraintCode,
+                                       OpInfo.ConstraintVT);
 
   unsigned NumRegs = 1;
   if (OpInfo.ConstraintVT != MVT::Other) {
@@ -6462,8 +5801,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   SDISelAsmOperandInfoVector ConstraintOperands;
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  TargetLowering::AsmOperandInfoVector
-    TargetConstraints = TLI.ParseConstraints(CS);
+  TargetLowering::AsmOperandInfoVector TargetConstraints =
+      TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS);
 
   bool hasMemory = false;
 
@@ -6555,12 +5894,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-        std::pair<unsigned, const TargetRegisterClass*> MatchRC =
-          TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                            OpInfo.ConstraintVT);
-        std::pair<unsigned, const TargetRegisterClass*> InputRC =
-          TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
-                                            Input.ConstraintVT);
+	const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+        std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+            TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+                                             OpInfo.ConstraintVT);
+        std::pair<unsigned, const TargetRegisterClass *> InputRC =
+            TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+                                             Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
             (MatchRC.second != InputRC.second)) {
@@ -6687,7 +6027,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     }
   }
 
-  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(),
                                                   TLI.getPointerTy()));
 
   // Loop over all of the inputs, copying the operand values into the
@@ -6707,10 +6047,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         // Memory output, or 'other' output (e.g. 'X' constraint).
         assert(OpInfo.isIndirect && "Memory output must be indirect operand");
 
+        unsigned ConstraintID =
+            TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+        assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+               "Failed to convert memory constraint code to constraint id.");
+
         // Add information to the INLINEASM node to know about this output.
         unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
-        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
-                                                        TLI.getPointerTy()));
+        OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
+                                                        MVT::i32));
         AsmNodeOperands.push_back(OpInfo.CallOperand);
         break;
       }
@@ -6745,7 +6091,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
           .AddInlineAsmOperands(OpInfo.isEarlyClobber
                                     ? InlineAsm::Kind_RegDefEarlyClobber
                                     : InlineAsm::Kind_RegDef,
-                                false, 0, DAG, AsmNodeOperands);
+                                false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isInput: {
@@ -6800,11 +6146,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
               return;
             }
           }
+          SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
-          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(),
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
                                     Chain, &Flag, CS.getInstruction());
           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
-                                           true, OpInfo.getMatchedOperand(),
+                                           true, OpInfo.getMatchedOperand(), dl,
                                            DAG, AsmNodeOperands);
           break;
         }
@@ -6814,9 +6161,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                "Unexpected number of operands");
         // Add information to the INLINEASM node to know about this input.
         // See InlineAsm.h isUseOperandTiedToDef.
+        OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
         OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
                                                     OpInfo.getMatchedOperand());
-        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(),
                                                         TLI.getPointerTy()));
         AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
         break;
@@ -6843,6 +6191,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         unsigned ResOpType =
           InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        getCurSDLoc(),
                                                         TLI.getPointerTy()));
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
@@ -6853,10 +6202,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
                "Memory operands expect pointer values");
 
+        unsigned ConstraintID =
+            TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+        assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+               "Failed to convert memory constraint code to constraint id.");
+
         // Add information to the INLINEASM node to know about this input.
         unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
-                                                        TLI.getPointerTy()));
+                                                        getCurSDLoc(),
+                                                        MVT::i32));
         AsmNodeOperands.push_back(InOperandVal);
         break;
       }
@@ -6884,11 +6240,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         return;
       }
 
-      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(),
+      SDLoc dl = getCurSDLoc();
+
+      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
                                         Chain, &Flag, CS.getInstruction());
 
       OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
-                                               DAG, AsmNodeOperands);
+                                               dl, DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isClobber: {
@@ -6896,7 +6254,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
         OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
-                                                 false, 0, DAG,
+                                                 false, 0, getCurSDLoc(), DAG,
                                                  AsmNodeOperands);
       break;
     }
@@ -7020,7 +6378,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
 std::pair<SDValue, SDValue>
 SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
                                        unsigned NumArgs, SDValue Callee,
-                                       bool UseVoidTy,
+                                       Type *ReturnTy,
                                        MachineBasicBlock *LandingPad,
                                        bool IsPatchPoint) {
   TargetLowering::ArgListTy Args;
@@ -7041,10 +6399,9 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
     Args.push_back(Entry);
   }
 
-  Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
-    .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
+    .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
     .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
 
   return lowerInvokable(CLI, LandingPad);
@@ -7068,15 +6425,15 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
 /// only available in a register, then the runtime would need to trap when
 /// execution reaches the StackMap in order to read the alloca's location.
 static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
-                                SmallVectorImpl<SDValue> &Ops,
+                                SDLoc DL, SmallVectorImpl<SDValue> &Ops,
                                 SelectionDAGBuilder &Builder) {
   for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
     SDValue OpVal = Builder.getValue(CS.getArgument(i));
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
       Ops.push_back(
-        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
       Ops.push_back(
-        Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+        Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
     } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
       const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
       Ops.push_back(
@@ -7098,7 +6455,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
 
   SDLoc DL = getCurSDLoc();
   Callee = getValue(CI.getCalledValue());
-  NullPtr = DAG.getIntPtrConstant(0, true);
+  NullPtr = DAG.getIntPtrConstant(0, DL, true);
 
   // The stackmap intrinsic only records the live variables (the arguemnts
   // passed to it) and emits NOPS (if requested). Unlike the patchpoint
@@ -7116,13 +6473,14 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
   // Add the <id> and <numBytes> constants.
   SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
   Ops.push_back(DAG.getTargetConstant(
-                  cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+                  cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
   SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
   Ops.push_back(DAG.getTargetConstant(
-                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
+                  MVT::i32));
 
   // Push live variables for the stack map.
-  addStackMapLiveVars(&CI, 2, Ops, *this);
+  addStackMapLiveVars(&CI, 2, DL, Ops, *this);
 
   // We are not pushing any register mask info here on the operands list,
   // because the stackmap doesn't clobber anything.
@@ -7161,7 +6519,17 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
   CallingConv::ID CC = CS.getCallingConv();
   bool IsAnyRegCC = CC == CallingConv::AnyReg;
   bool HasDef = !CS->getType()->isVoidTy();
-  SDValue Callee = getValue(CS->getOperand(2)); // <target>
+  SDLoc dl = getCurSDLoc();
+  SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
+
+  // Handle immediate and symbolic callees.
+  if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
+    Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
+                                   /*isTarget=*/true);
+  else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee =  DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
+                                         SDLoc(SymbolicCallee),
+                                         SymbolicCallee->getValueType(0));
 
   // Get the real number of arguments participating in the call <numArgs>
   SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
@@ -7175,8 +6543,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
 
   // For AnyRegCC the arguments are lowered later on manually.
   unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+  Type *ReturnTy =
+    IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
   std::pair<SDValue, SDValue> Result =
-    lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC,
+    lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
                       LandingPad, true);
 
   SDNode *CallEnd = Result.second.getNode();
@@ -7196,26 +6566,24 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
   // Add the <id> and <numBytes> constants.
   SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
   Ops.push_back(DAG.getTargetConstant(
-                  cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+                  cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
   SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
   Ops.push_back(DAG.getTargetConstant(
-                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
+                  MVT::i32));
 
-  // Assume that the Callee is a constant address.
-  // FIXME: handle function symbols in the future.
-  Ops.push_back(
-    DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
-                          /*isTarget=*/true));
+  // Add the callee.
+  Ops.push_back(Callee);
 
   // Adjust <numArgs> to account for any arguments that have been passed on the
   // stack instead.
   // Call Node: Chain, Target, {Args}, RegMask, [Glue]
   unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
   NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
-  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
 
   // Add the calling convention
-  Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32));
+  Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
 
   // Add the arguments we omitted previously. The register allocator should
   // place these in any free register.
@@ -7225,11 +6593,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
 
   // Push the arguments from the call instruction up to the register mask.
   SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
-  for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
-    Ops.push_back(*i);
+  Ops.append(Call->op_begin() + 2, e);
 
   // Push live variables for the stack map.
-  addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this);
+  addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
 
   // Push the register mask info.
   if (HasGlue)
@@ -7262,7 +6629,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
 
   // Replace the target specific call node with a PATCHPOINT node.
   MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
-                                         getCurSDLoc(), NodeTys, Ops);
+                                         dl, NodeTys, Ops);
 
   // Update the NodeMap.
   if (HasDef) {
@@ -7529,7 +6896,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
 
     for (unsigned i = 0; i < NumValues; ++i) {
       SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
-                                    CLI.DAG.getConstant(Offsets[i], PtrVT));
+                                    CLI.DAG.getConstant(Offsets[i], CLI.DL,
+                                                        PtrVT));
       SDValue L = CLI.DAG.getLoad(
           RetTys[i], CLI.DL, CLI.Chain, Add,
           MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
@@ -7849,7 +7217,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
   assert(i == InVals.size() && "Argument register count mismatch!");
 
   // Finally, if the target has anything special to do, allow it to do so.
-  // FIXME: this should insert code into the DAG!
   EmitFunctionEntryCode();
 }
 
@@ -7866,8 +7233,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
 
   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
 
-  // Check successor nodes' PHI nodes that expect a constant to be available
-  // from this block.
+  // Check PHI nodes in successors that expect a value to be available from this
+  // block.
   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
     const BasicBlock *SuccBB = TI->getSuccessor(succ);
     if (!isa<PHINode>(SuccBB->begin())) continue;
@@ -7954,3 +7321,816 @@ AddSuccessorMBB(const BasicBlock *BB,
       SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
   return SuccMBB;
 }
+
+MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
+  MachineFunction::iterator I = MBB;
+  if (++I == FuncInfo.MF->end())
+    return nullptr;
+  return I;
+}
+
+/// During lowering new call nodes can be created (such as memset, etc.).
+/// Those will become new roots of the current DAG, but complications arise
+/// when they are tail calls. In such cases, the call lowering will update
+/// the root, but the builder still needs to know that a tail call has been
+/// lowered in order to avoid generating an additional return.
+void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
+  // If the node is null, we do have a tail call.
+  if (MaybeTC.getNode() != nullptr)
+    DAG.setRoot(MaybeTC);
+  else
+    HasTailCall = true;
+}
+
+bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
+                                  unsigned *TotalCases, unsigned First,
+                                  unsigned Last) {
+  assert(Last >= First);
+  assert(TotalCases[Last] >= TotalCases[First]);
+
+  APInt LowCase = Clusters[First].Low->getValue();
+  APInt HighCase = Clusters[Last].High->getValue();
+  assert(LowCase.getBitWidth() == HighCase.getBitWidth());
+
+  // FIXME: A range of consecutive cases has 100% density, but only requires one
+  // comparison to lower. We should discriminate against such consecutive ranges
+  // in jump tables.
+
+  uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100);
+  uint64_t Range = Diff + 1;
+
+  uint64_t NumCases =
+      TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
+
+  assert(NumCases < UINT64_MAX / 100);
+  assert(Range >= NumCases);
+
+  return NumCases * 100 >= Range * MinJumpTableDensity;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+  return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+         TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
+}
+
+bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
+                                         unsigned First, unsigned Last,
+                                         const SwitchInst *SI,
+                                         MachineBasicBlock *DefaultMBB,
+                                         CaseCluster &JTCluster) {
+  assert(First <= Last);
+
+  uint32_t Weight = 0;
+  unsigned NumCmps = 0;
+  std::vector<MachineBasicBlock*> Table;
+  DenseMap<MachineBasicBlock*, uint32_t> JTWeights;
+  for (unsigned I = First; I <= Last; ++I) {
+    assert(Clusters[I].Kind == CC_Range);
+    Weight += Clusters[I].Weight;
+    assert(Weight >= Clusters[I].Weight && "Weight overflow!");
+    APInt Low = Clusters[I].Low->getValue();
+    APInt High = Clusters[I].High->getValue();
+    NumCmps += (Low == High) ? 1 : 2;
+    if (I != First) {
+      // Fill the gap between this and the previous cluster.
+      APInt PreviousHigh = Clusters[I - 1].High->getValue();
+      assert(PreviousHigh.slt(Low));
+      uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
+      for (uint64_t J = 0; J < Gap; J++)
+        Table.push_back(DefaultMBB);
+    }
+    uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
+    for (uint64_t J = 0; J < ClusterSize; ++J)
+      Table.push_back(Clusters[I].MBB);
+    JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
+  }
+
+  unsigned NumDests = JTWeights.size();
+  if (isSuitableForBitTests(NumDests, NumCmps,
+                            Clusters[First].Low->getValue(),
+                            Clusters[Last].High->getValue())) {
+    // Clusters[First..Last] should be lowered as bit tests instead.
+    return false;
+  }
+
+  // Create the MBB that will load from and jump through the table.
+  // Note: We create it here, but it's not inserted into the function yet.
+  MachineFunction *CurMF = FuncInfo.MF;
+  MachineBasicBlock *JumpTableMBB =
+      CurMF->CreateMachineBasicBlock(SI->getParent());
+
+  // Add successors. Note: use table order for determinism.
+  SmallPtrSet<MachineBasicBlock *, 8> Done;
+  for (MachineBasicBlock *Succ : Table) {
+    if (Done.count(Succ))
+      continue;
+    addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
+    Done.insert(Succ);
+  }
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
+                     ->createJumpTableIndex(Table);
+
+  // Set up the jump table info.
+  JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
+  JumpTableHeader JTH(Clusters[First].Low->getValue(),
+                      Clusters[Last].High->getValue(), SI->getCondition(),
+                      nullptr, false);
+  JTCases.push_back(JumpTableBlock(JTH, JT));
+
+  JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
+                                     JTCases.size() - 1, Weight);
+  return true;
+}
+
+void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
+                                         const SwitchInst *SI,
+                                         MachineBasicBlock *DefaultMBB) {
+#ifndef NDEBUG
+  // Clusters must be non-empty, sorted, and only contain Range clusters.
+  assert(!Clusters.empty());
+  for (CaseCluster &C : Clusters)
+    assert(C.Kind == CC_Range);
+  for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
+    assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!areJTsAllowed(TLI))
+    return;
+
+  const int64_t N = Clusters.size();
+  const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
+
+  // Split Clusters into minimum number of dense partitions. The algorithm uses
+  // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
+  // for the Case Statement'" (1994), but builds the MinPartitions array in
+  // reverse order to make it easier to reconstruct the partitions in ascending
+  // order. In the choice between two optimal partitionings, it picks the one
+  // which yields more jump tables.
+
+  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+  SmallVector<unsigned, 8> MinPartitions(N);
+  // LastElement[i] is the last element of the partition starting at i.
+  SmallVector<unsigned, 8> LastElement(N);
+  // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
+  SmallVector<unsigned, 8> NumTables(N);
+  // TotalCases[i]: Total nbr of cases in Clusters[0..i].
+  SmallVector<unsigned, 8> TotalCases(N);
+
+  for (unsigned i = 0; i < N; ++i) {
+    APInt Hi = Clusters[i].High->getValue();
+    APInt Lo = Clusters[i].Low->getValue();
+    TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
+    if (i != 0)
+      TotalCases[i] += TotalCases[i - 1];
+  }
+
+  // Base case: There is only one way to partition Clusters[N-1].
+  MinPartitions[N - 1] = 1;
+  LastElement[N - 1] = N - 1;
+  assert(MinJumpTableSize > 1);
+  NumTables[N - 1] = 0;
+
+  // Note: loop indexes are signed to avoid underflow.
+  for (int64_t i = N - 2; i >= 0; i--) {
+    // Find optimal partitioning of Clusters[i..N-1].
+    // Baseline: Put Clusters[i] into a partition on its own.
+    MinPartitions[i] = MinPartitions[i + 1] + 1;
+    LastElement[i] = i;
+    NumTables[i] = NumTables[i + 1];
+
+    // Search for a solution that results in fewer partitions.
+    for (int64_t j = N - 1; j > i; j--) {
+      // Try building a partition from Clusters[i..j].
+      if (isDense(Clusters, &TotalCases[0], i, j)) {
+        unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+        bool IsTable = j - i + 1 >= MinJumpTableSize;
+        unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
+
+        // If this j leads to fewer partitions, or same number of partitions
+        // with more lookup tables, it is a better partitioning.
+        if (NumPartitions < MinPartitions[i] ||
+            (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
+          MinPartitions[i] = NumPartitions;
+          LastElement[i] = j;
+          NumTables[i] = Tables;
+        }
+      }
+    }
+  }
+
+  // Iterate over the partitions, replacing some with jump tables in-place.
+  unsigned DstIndex = 0;
+  for (unsigned First = 0, Last; First < N; First = Last + 1) {
+    Last = LastElement[First];
+    assert(Last >= First);
+    assert(DstIndex <= First);
+    unsigned NumClusters = Last - First + 1;
+
+    CaseCluster JTCluster;
+    if (NumClusters >= MinJumpTableSize &&
+        buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
+      Clusters[DstIndex++] = JTCluster;
+    } else {
+      for (unsigned I = First; I <= Last; ++I)
+        std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+    }
+  }
+  Clusters.resize(DstIndex);
+}
+
+bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
+  // FIXME: Using the pointer type doesn't seem ideal.
+  uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
+  uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
+  return Range <= BW;
+}
+
+bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests,
+                                                unsigned NumCmps,
+                                                const APInt &Low,
+                                                const APInt &High) {
+  // FIXME: I don't think NumCmps is the correct metric: a single case and a
+  // range of cases both require only one branch to lower. Just looking at the
+  // number of clusters and destinations should be enough to decide whether to
+  // build bit tests.
+
+  // To lower a range with bit tests, the range must fit the bitwidth of a
+  // machine word.
+  if (!rangeFitsInWord(Low, High))
+    return false;
+
+  // Decide whether it's profitable to lower this range with bit tests. Each
+  // destination requires a bit test and branch, and there is an overall range
+  // check branch. For a small number of clusters, separate comparisons might be
+  // cheaper, and for many destinations, splitting the range might be better.
+  return (NumDests == 1 && NumCmps >= 3) ||
+         (NumDests == 2 && NumCmps >= 5) ||
+         (NumDests == 3 && NumCmps >= 6);
+}
+
+bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
+                                        unsigned First, unsigned Last,
+                                        const SwitchInst *SI,
+                                        CaseCluster &BTCluster) {
+  assert(First <= Last);
+  if (First == Last)
+    return false;
+
+  BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+  unsigned NumCmps = 0;
+  for (int64_t I = First; I <= Last; ++I) {
+    assert(Clusters[I].Kind == CC_Range);
+    Dests.set(Clusters[I].MBB->getNumber());
+    NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+  }
+  unsigned NumDests = Dests.count();
+
+  APInt Low = Clusters[First].Low->getValue();
+  APInt High = Clusters[Last].High->getValue();
+  assert(Low.slt(High));
+
+  if (!isSuitableForBitTests(NumDests, NumCmps, Low, High))
+    return false;
+
+  APInt LowBound;
+  APInt CmpRange;
+
+  const int BitWidth =
+      DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
+  assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!");
+
+  if (Low.isNonNegative() && High.slt(BitWidth)) {
+    // Optimize the case where all the case values fit in a
+    // word without having to subtract minValue. In this case,
+    // we can optimize away the subtraction.
+    LowBound = APInt::getNullValue(Low.getBitWidth());
+    CmpRange = High;
+  } else {
+    LowBound = Low;
+    CmpRange = High - Low;
+  }
+
+  CaseBitsVector CBV;
+  uint32_t TotalWeight = 0;
+  for (unsigned i = First; i <= Last; ++i) {
+    // Find the CaseBits for this destination.
+    unsigned j;
+    for (j = 0; j < CBV.size(); ++j)
+      if (CBV[j].BB == Clusters[i].MBB)
+        break;
+    if (j == CBV.size())
+      CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
+    CaseBits *CB = &CBV[j];
+
+    // Update Mask, Bits and ExtraWeight.
+    uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
+    uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
+    for (uint64_t j = Lo; j <= Hi; ++j) {
+      CB->Mask |= 1ULL << j;
+      CB->Bits++;
+    }
+    CB->ExtraWeight += Clusters[i].Weight;
+    TotalWeight += Clusters[i].Weight;
+    assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!");
+  }
+
+  BitTestInfo BTI;
+  std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
+    // Sort by weight first, number of bits second.
+    if (a.ExtraWeight != b.ExtraWeight)
+      return a.ExtraWeight > b.ExtraWeight;
+    return a.Bits > b.Bits;
+  });
+
+  for (auto &CB : CBV) {
+    MachineBasicBlock *BitTestBB =
+        FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
+    BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
+  }
+  BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(),
+                                      -1U, MVT::Other, false, nullptr,
+                                      nullptr, std::move(BTI)));
+
+  BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
+                                    BitTestCases.size() - 1, TotalWeight);
+  return true;
+}
+
+void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
+                                              const SwitchInst *SI) {
+// Partition Clusters into as few subsets as possible, where each subset has a
+// range that fits in a machine word and has <= 3 unique destinations.
+
+#ifndef NDEBUG
+  // Clusters must be sorted and contain Range or JumpTable clusters.
+  assert(!Clusters.empty());
+  assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
+  for (const CaseCluster &C : Clusters)
+    assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
+  for (unsigned i = 1; i < Clusters.size(); ++i)
+    assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+  // If target does not have legal shift left, do not emit bit tests at all.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT PTy = TLI.getPointerTy();
+  if (!TLI.isOperationLegal(ISD::SHL, PTy))
+    return;
+
+  int BitWidth = PTy.getSizeInBits();
+  const int64_t N = Clusters.size();
+
+  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+  SmallVector<unsigned, 8> MinPartitions(N);
+  // LastElement[i] is the last element of the partition starting at i.
+  SmallVector<unsigned, 8> LastElement(N);
+
+  // FIXME: This might not be the best algorithm for finding bit test clusters.
+
+  // Base case: There is only one way to partition Clusters[N-1].
+  MinPartitions[N - 1] = 1;
+  LastElement[N - 1] = N - 1;
+
+  // Note: loop indexes are signed to avoid underflow.
+  for (int64_t i = N - 2; i >= 0; --i) {
+    // Find optimal partitioning of Clusters[i..N-1].
+    // Baseline: Put Clusters[i] into a partition on its own.
+    MinPartitions[i] = MinPartitions[i + 1] + 1;
+    LastElement[i] = i;
+
+    // Search for a solution that results in fewer partitions.
+    // Note: the search is limited by BitWidth, reducing time complexity.
+    for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
+      // Try building a partition from Clusters[i..j].
+
+      // Check the range.
+      if (!rangeFitsInWord(Clusters[i].Low->getValue(),
+                           Clusters[j].High->getValue()))
+        continue;
+
+      // Check nbr of destinations and cluster types.
+      // FIXME: This works, but doesn't seem very efficient.
+      bool RangesOnly = true;
+      BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+      for (int64_t k = i; k <= j; k++) {
+        if (Clusters[k].Kind != CC_Range) {
+          RangesOnly = false;
+          break;
+        }
+        Dests.set(Clusters[k].MBB->getNumber());
+      }
+      if (!RangesOnly || Dests.count() > 3)
+        break;
+
+      // Check if it's a better partition.
+      unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+      if (NumPartitions < MinPartitions[i]) {
+        // Found a better partition.
+        MinPartitions[i] = NumPartitions;
+        LastElement[i] = j;
+      }
+    }
+  }
+
+  // Iterate over the partitions, replacing with bit-test clusters in-place.
+  unsigned DstIndex = 0;
+  for (unsigned First = 0, Last; First < N; First = Last + 1) {
+    Last = LastElement[First];
+    assert(First <= Last);
+    assert(DstIndex <= First);
+
+    CaseCluster BitTestCluster;
+    if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
+      Clusters[DstIndex++] = BitTestCluster;
+    } else {
+      for (unsigned I = First; I <= Last; ++I)
+        std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+    }
+  }
+  Clusters.resize(DstIndex);
+}
+
+void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+                                        MachineBasicBlock *SwitchMBB,
+                                        MachineBasicBlock *DefaultMBB) {
+  MachineFunction *CurMF = FuncInfo.MF;
+  MachineBasicBlock *NextMBB = nullptr;
+  MachineFunction::iterator BBI = W.MBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextMBB = BBI;
+
+  unsigned Size = W.LastCluster - W.FirstCluster + 1;
+
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+
+  if (Size == 2 && W.MBB == SwitchMBB) {
+    // If any two of the cases has the same destination, and if one value
+    // is the same as the other, but has one bit unset that the other has set,
+    // use bit manipulation to do two compares at once.  For example:
+    // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+    // TODO: This could be extended to merge any 2 cases in switches with 3
+    // cases.
+    // TODO: Handle cases where W.CaseBB != SwitchBB.
+    CaseCluster &Small = *W.FirstCluster;
+    CaseCluster &Big = *W.LastCluster;
+
+    if (Small.Low == Small.High && Big.Low == Big.High &&
+        Small.MBB == Big.MBB) {
+      const APInt &SmallValue = Small.Low->getValue();
+      const APInt &BigValue = Big.Low->getValue();
+
+      // Check that there is only one bit different.
+      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+          (SmallValue | BigValue) == BigValue) {
+        // Isolate the common bit.
+        APInt CommonBit = BigValue & ~SmallValue;
+        assert((SmallValue | CommonBit) == BigValue &&
+               CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+        SDValue CondLHS = getValue(Cond);
+        EVT VT = CondLHS.getValueType();
+        SDLoc DL = getCurSDLoc();
+
+        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                 DAG.getConstant(CommonBit, DL, VT));
+        SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or,
+                                    DAG.getConstant(BigValue, DL, VT),
+                                    ISD::SETEQ);
+
+        // Update successor info.
+        // Both Small and Big will jump to Small.BB, so we sum up the weights.
+        addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
+        addSuccessorWithWeight(
+            SwitchMBB, DefaultMBB,
+            // The default destination is the first successor in IR.
+            BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
+                : 0);
+
+        // Insert the true branch.
+        SDValue BrCond =
+            DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
+                        DAG.getBasicBlock(Small.MBB));
+        // Insert the false branch.
+        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+                             DAG.getBasicBlock(DefaultMBB));
+
+        DAG.setRoot(BrCond);
+        return;
+      }
+    }
+  }
+
+  if (TM.getOptLevel() != CodeGenOpt::None) {
+    // Order cases by weight so the most likely case will be checked first.
+    std::sort(W.FirstCluster, W.LastCluster + 1,
+              [](const CaseCluster &a, const CaseCluster &b) {
+      return a.Weight > b.Weight;
+    });
+
+    // Rearrange the case blocks so that the last one falls through if possible
+    // without without changing the order of weights.
+    for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
+      --I;
+      if (I->Weight > W.LastCluster->Weight)
+        break;
+      if (I->Kind == CC_Range && I->MBB == NextMBB) {
+        std::swap(*I, *W.LastCluster);
+        break;
+      }
+    }
+  }
+
+  // Compute total weight.
+  uint32_t UnhandledWeights = 0;
+  for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
+    UnhandledWeights += I->Weight;
+    assert(UnhandledWeights >= I->Weight && "Weight overflow!");
+  }
+
+  MachineBasicBlock *CurMBB = W.MBB;
+  for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+    MachineBasicBlock *Fallthrough;
+    if (I == W.LastCluster) {
+      // For the last cluster, fall through to the default destination.
+      Fallthrough = DefaultMBB;
+    } else {
+      Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+      CurMF->insert(BBI, Fallthrough);
+      // Put Cond in a virtual register to make it available from the new blocks.
+      ExportFromCurrentBlock(Cond);
+    }
+
+    switch (I->Kind) {
+      case CC_JumpTable: {
+        // FIXME: Optimize away range check based on pivot comparisons.
+        JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
+        JumpTable *JT = &JTCases[I->JTCasesIndex].second;
+
+        // The jump block hasn't been inserted yet; insert it here.
+        MachineBasicBlock *JumpMBB = JT->MBB;
+        CurMF->insert(BBI, JumpMBB);
+        addSuccessorWithWeight(CurMBB, Fallthrough);
+        addSuccessorWithWeight(CurMBB, JumpMBB);
+
+        // The jump table header will be inserted in our current block, do the
+        // range check, and fall through to our fallthrough block.
+        JTH->HeaderBB = CurMBB;
+        JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+        // If we're in the right place, emit the jump table header right now.
+        if (CurMBB == SwitchMBB) {
+          visitJumpTableHeader(*JT, *JTH, SwitchMBB);
+          JTH->Emitted = true;
+        }
+        break;
+      }
+      case CC_BitTests: {
+        // FIXME: Optimize away range check based on pivot comparisons.
+        BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
+
+        // The bit test blocks haven't been inserted yet; insert them here.
+        for (BitTestCase &BTC : BTB->Cases)
+          CurMF->insert(BBI, BTC.ThisBB);
+
+        // Fill in fields of the BitTestBlock.
+        BTB->Parent = CurMBB;
+        BTB->Default = Fallthrough;
+
+        // If we're in the right place, emit the bit test header header right now.
+        if (CurMBB ==SwitchMBB) {
+          visitBitTestHeader(*BTB, SwitchMBB);
+          BTB->Emitted = true;
+        }
+        break;
+      }
+      case CC_Range: {
+        const Value *RHS, *LHS, *MHS;
+        ISD::CondCode CC;
+        if (I->Low == I->High) {
+          // Check Cond == I->Low.
+          CC = ISD::SETEQ;
+          LHS = Cond;
+          RHS=I->Low;
+          MHS = nullptr;
+        } else {
+          // Check I->Low <= Cond <= I->High.
+          CC = ISD::SETLE;
+          LHS = I->Low;
+          MHS = Cond;
+          RHS = I->High;
+        }
+
+        // The false weight is the sum of all unhandled cases.
+        UnhandledWeights -= I->Weight;
+        CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
+                     UnhandledWeights);
+
+        if (CurMBB == SwitchMBB)
+          visitSwitchCase(CB, SwitchMBB);
+        else
+          SwitchCases.push_back(CB);
+
+        break;
+      }
+    }
+    CurMBB = Fallthrough;
+  }
+}
+
+void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
+                                        const SwitchWorkListItem &W,
+                                        Value *Cond,
+                                        MachineBasicBlock *SwitchMBB) {
+  assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
+         "Clusters not sorted?");
+
+  assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
+
+  // Balance the tree based on branch weights to create a near-optimal (in terms
+  // of search time given key frequency) binary search tree. See e.g. Kurt
+  // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
+  CaseClusterIt LastLeft = W.FirstCluster;
+  CaseClusterIt FirstRight = W.LastCluster;
+  uint32_t LeftWeight = LastLeft->Weight;
+  uint32_t RightWeight = FirstRight->Weight;
+
+  // Move LastLeft and FirstRight towards each other from opposite directions to
+  // find a partitioning of the clusters which balances the weight on both
+  // sides. If LeftWeight and RightWeight are equal, alternate which side is
+  // taken to ensure 0-weight nodes are distributed evenly.
+  unsigned I = 0;
+  while (LastLeft + 1 < FirstRight) {
+    if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1)))
+      LeftWeight += (++LastLeft)->Weight;
+    else
+      RightWeight += (--FirstRight)->Weight;
+    I++;
+  }
+  assert(LastLeft + 1 == FirstRight);
+  assert(LastLeft >= W.FirstCluster);
+  assert(FirstRight <= W.LastCluster);
+
+  // Use the first element on the right as pivot since we will make less-than
+  // comparisons against it.
+  CaseClusterIt PivotCluster = FirstRight;
+  assert(PivotCluster > W.FirstCluster);
+  assert(PivotCluster <= W.LastCluster);
+
+  CaseClusterIt FirstLeft = W.FirstCluster;
+  CaseClusterIt LastRight = W.LastCluster;
+
+  const ConstantInt *Pivot = PivotCluster->Low;
+
+  // New blocks will be inserted immediately after the current one.
+  MachineFunction::iterator BBI = W.MBB;
+  ++BBI;
+
+  // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
+  // we can branch to its destination directly if it's squeezed exactly in
+  // between the known lower bound and Pivot - 1.
+  MachineBasicBlock *LeftMBB;
+  if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
+      FirstLeft->Low == W.GE &&
+      (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
+    LeftMBB = FirstLeft->MBB;
+  } else {
+    LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+    FuncInfo.MF->insert(BBI, LeftMBB);
+    WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
+    // Put Cond in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(Cond);
+  }
+
+  // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
+  // single cluster, RHS.Low == Pivot, and we can branch to its destination
+  // directly if RHS.High equals the current upper bound.
+  MachineBasicBlock *RightMBB;
+  if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
+      W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
+    RightMBB = FirstRight->MBB;
+  } else {
+    RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+    FuncInfo.MF->insert(BBI, RightMBB);
+    WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
+    // Put Cond in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(Cond);
+  }
+
+  // Create the CaseBlock record that will be used to lower the branch.
+  CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
+               LeftWeight, RightWeight);
+
+  if (W.MBB == SwitchMBB)
+    visitSwitchCase(CB, SwitchMBB);
+  else
+    SwitchCases.push_back(CB);
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+  // Extract cases from the switch.
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  CaseClusterVector Clusters;
+  Clusters.reserve(SI.getNumCases());
+  for (auto I : SI.cases()) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
+    const ConstantInt *CaseVal = I.getCaseValue();
+    uint32_t Weight =
+        BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0;
+    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
+  }
+
+  MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+  // Cluster adjacent cases with the same destination. We do this at all
+  // optimization levels because it's cheap to do and will make codegen faster
+  // if there are many clusters.
+  sortAndRangeify(Clusters);
+
+  if (TM.getOptLevel() != CodeGenOpt::None) {
+    // Replace an unreachable default with the most popular destination.
+    // FIXME: Exploit unreachable default more aggressively.
+    bool UnreachableDefault =
+        isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
+    if (UnreachableDefault && !Clusters.empty()) {
+      DenseMap<const BasicBlock *, unsigned> Popularity;
+      unsigned MaxPop = 0;
+      const BasicBlock *MaxBB = nullptr;
+      for (auto I : SI.cases()) {
+        const BasicBlock *BB = I.getCaseSuccessor();
+        if (++Popularity[BB] > MaxPop) {
+          MaxPop = Popularity[BB];
+          MaxBB = BB;
+        }
+      }
+      // Set new default.
+      assert(MaxPop > 0 && MaxBB);
+      DefaultMBB = FuncInfo.MBBMap[MaxBB];
+
+      // Remove cases that were pointing to the destination that is now the
+      // default.
+      CaseClusterVector New;
+      New.reserve(Clusters.size());
+      for (CaseCluster &CC : Clusters) {
+        if (CC.MBB != DefaultMBB)
+          New.push_back(CC);
+      }
+      Clusters = std::move(New);
+    }
+  }
+
+  // If there is only the default destination, jump there directly.
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+  if (Clusters.empty()) {
+    SwitchMBB->addSuccessor(DefaultMBB);
+    if (DefaultMBB != NextBlock(SwitchMBB)) {
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+                              getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
+    }
+    return;
+  }
+
+  if (TM.getOptLevel() != CodeGenOpt::None) {
+    findJumpTables(Clusters, &SI, DefaultMBB);
+    findBitTestClusters(Clusters, &SI);
+  }
+
+
+  DEBUG({
+    dbgs() << "Case clusters: ";
+    for (const CaseCluster &C : Clusters) {
+      if (C.Kind == CC_JumpTable) dbgs() << "JT:";
+      if (C.Kind == CC_BitTests) dbgs() << "BT:";
+
+      C.Low->getValue().print(dbgs(), true);
+      if (C.Low != C.High) {
+        dbgs() << '-';
+        C.High->getValue().print(dbgs(), true);
+      }
+      dbgs() << ' ';
+    }
+    dbgs() << '\n';
+  });
+
+  assert(!Clusters.empty());
+  SwitchWorkList WorkList;
+  CaseClusterIt First = Clusters.begin();
+  CaseClusterIt Last = Clusters.end() - 1;
+  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
+
+  while (!WorkList.empty()) {
+    SwitchWorkListItem W = WorkList.back();
+    WorkList.pop_back();
+    unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
+
+    if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
+      // For optimized builds, lower large range as a balanced binary tree.
+      splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
+      continue;
+    }
+
+    lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 9070091..f0c03af 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -17,9 +17,11 @@
 #include "StatepointLowering.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Statepoint.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLowering.h"
@@ -133,26 +135,65 @@ private:
   /// SDNodes we create.
   unsigned SDNodeOrder;
 
-  /// Case - A struct to record the Value for a switch case, and the
-  /// case's target basic block.
-  struct Case {
-    const Constant *Low;
-    const Constant *High;
-    MachineBasicBlock* BB;
-    uint32_t ExtraWeight;
+  enum CaseClusterKind {
+    /// A cluster of adjacent case labels with the same destination, or just one
+    /// case.
+    CC_Range,
+    /// A cluster of cases suitable for jump table lowering.
+    CC_JumpTable,
+    /// A cluster of cases suitable for bit test lowering.
+    CC_BitTests
+  };
 
-    Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
-    Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
-         uint32_t extraweight) : Low(low), High(high), BB(bb),
-         ExtraWeight(extraweight) { }
+  /// A cluster of case labels.
+  struct CaseCluster {
+    CaseClusterKind Kind;
+    const ConstantInt *Low, *High;
+    union {
+      MachineBasicBlock *MBB;
+      unsigned JTCasesIndex;
+      unsigned BTCasesIndex;
+    };
+    uint32_t Weight;
+
+    static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
+                             MachineBasicBlock *MBB, uint32_t Weight) {
+      CaseCluster C;
+      C.Kind = CC_Range;
+      C.Low = Low;
+      C.High = High;
+      C.MBB = MBB;
+      C.Weight = Weight;
+      return C;
+    }
 
-    APInt size() const {
-      const APInt &rHigh = cast<ConstantInt>(High)->getValue();
-      const APInt &rLow  = cast<ConstantInt>(Low)->getValue();
-      return (rHigh - rLow + 1ULL);
+    static CaseCluster jumpTable(const ConstantInt *Low,
+                                 const ConstantInt *High, unsigned JTCasesIndex,
+                                 uint32_t Weight) {
+      CaseCluster C;
+      C.Kind = CC_JumpTable;
+      C.Low = Low;
+      C.High = High;
+      C.JTCasesIndex = JTCasesIndex;
+      C.Weight = Weight;
+      return C;
+    }
+
+    static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
+                                unsigned BTCasesIndex, uint32_t Weight) {
+      CaseCluster C;
+      C.Kind = CC_BitTests;
+      C.Low = Low;
+      C.High = High;
+      C.BTCasesIndex = BTCasesIndex;
+      C.Weight = Weight;
+      return C;
     }
   };
 
+  typedef std::vector<CaseCluster> CaseClusterVector;
+  typedef CaseClusterVector::iterator CaseClusterIt;
+
   struct CaseBits {
     uint64_t Mask;
     MachineBasicBlock* BB;
@@ -162,51 +203,14 @@ private:
     CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
              uint32_t Weight):
       Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
-  };
-
-  typedef std::vector<Case>           CaseVector;
-  typedef std::vector<CaseBits>       CaseBitsVector;
-  typedef CaseVector::iterator        CaseItr;
-  typedef std::pair<CaseItr, CaseItr> CaseRange;
-
-  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
-  /// of conditional branches.
-  struct CaseRec {
-    CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
-            CaseRange r) :
-    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
-
-    /// CaseBB - The MBB in which to emit the compare and branch
-    MachineBasicBlock *CaseBB;
-    /// LT, GE - If nonzero, we know the current case value must be less-than or
-    /// greater-than-or-equal-to these Constants.
-    const Constant *LT;
-    const Constant *GE;
-    /// Range - A pair of iterators representing the range of case values to be
-    /// processed at this point in the binary search tree.
-    CaseRange Range;
-  };
 
-  typedef std::vector<CaseRec> CaseRecVector;
-
-  /// The comparison function for sorting the switch case values in the vector.
-  /// WARNING: Case ranges should be disjoint!
-  struct CaseCmp {
-    bool operator()(const Case &C1, const Case &C2) {
-      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
-      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
-      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
-      return CI1->getValue().slt(CI2->getValue());
-    }
+    CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
   };
 
-  struct CaseBitsCmp {
-    bool operator()(const CaseBits &C1, const CaseBits &C2) {
-      return C1.Bits > C2.Bits;
-    }
-  };
+  typedef std::vector<CaseBits> CaseBitsVector;
 
-  void Clusterify(CaseVector &Cases, const SwitchInst &SI);
+  /// Sort Clusters and merge adjacent cases.
+  void sortAndRangeify(CaseClusterVector &Clusters);
 
   /// CaseBlock - This structure is used to communicate between
   /// SelectionDAGBuilder and SDISel for the code generation of additional basic
@@ -296,6 +300,58 @@ private:
     BitTestInfo Cases;
   };
 
+  /// Minimum jump table density, in percent.
+  enum { MinJumpTableDensity = 40 };
+
+  /// Check whether a range of clusters is dense enough for a jump table.
+  bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
+               unsigned First, unsigned Last);
+
+  /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
+  /// decides it's not a good idea.
+  bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
+                      unsigned Last, const SwitchInst *SI,
+                      MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
+
+  /// Find clusters of cases suitable for jump table lowering.
+  void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
+                      MachineBasicBlock *DefaultMBB);
+
+  /// Check whether the range [Low,High] fits in a machine word.
+  bool rangeFitsInWord(const APInt &Low, const APInt &High);
+
+  /// Check whether these clusters are suitable for lowering with bit tests based
+  /// on the number of destinations, comparison metric, and range.
+  bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
+                             const APInt &Low, const APInt &High);
+
+  /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
+  /// decides it's not a good idea.
+  bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
+                     const SwitchInst *SI, CaseCluster &BTCluster);
+
+  /// Find clusters of cases suitable for bit test lowering.
+  void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
+
+  struct SwitchWorkListItem {
+    MachineBasicBlock *MBB;
+    CaseClusterIt FirstCluster;
+    CaseClusterIt LastCluster;
+    const ConstantInt *GE;
+    const ConstantInt *LT;
+  };
+  typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
+
+  /// Emit comparison and split W into two subtrees.
+  void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
+                     Value *Cond, MachineBasicBlock *SwitchMBB);
+
+  /// Lower W.
+  void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+                     MachineBasicBlock *SwitchMBB,
+                     MachineBasicBlock *DefaultMBB);
+
+
   /// A class which encapsulates all of the information needed to generate a
   /// stack protector check and signals to isel via its state being initialized
   /// that a stack protector needs to be generated.
@@ -405,7 +461,6 @@ private:
     StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr),
                                  FailureMBB(nullptr), Guard(nullptr),
                                  GuardReg(0) { }
-    ~StackProtectorDescriptor() { }
 
     /// Returns true if all fields of the stack protector descriptor are
     /// initialized implying that we should/are ready to emit a stack protector.
@@ -605,10 +660,16 @@ public:
 
   void visit(unsigned Opcode, const User &I);
 
+  /// getCopyFromRegs - If there was virtual register allocated for the value V
+  /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+  SDValue getCopyFromRegs(const Value *V, Type *Ty);
+
   // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
   // generate the debug data structures now that we've seen its definition.
   void resolveDanglingDebugInfo(const Value *V, SDValue Val);
   SDValue getValue(const Value *V);
+  bool findValue(const Value *V) const;
+
   SDValue getNonRegisterValue(const Value *V);
   SDValue getValueImpl(const Value *V);
 
@@ -618,13 +679,6 @@ public:
     N = NewN;
   }
 
-  void removeValue(const Value *V) {
-    // This is to support hack in lowerCallFromStatepoint
-    // Should be removed when hack is resolved
-    if (NodeMap.count(V))
-      NodeMap.erase(V);
-  }
-
   void setUnusedArgValue(const Value *V, SDValue NewN) {
     SDValue &N = UnusedArgNodeMap[V];
     assert(!N.getNode() && "Already set a value for this node!");
@@ -652,7 +706,7 @@ public:
           unsigned ArgIdx,
           unsigned NumArgs,
           SDValue Callee,
-          bool UseVoidTy = false,
+          Type *ReturnTy,
           MachineBasicBlock *LandingPad = nullptr,
           bool IsPatchPoint = false);
 
@@ -660,6 +714,10 @@ public:
   /// references that need to refer to the last resulting block.
   void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
 
+  // This function is responsible for the whole statepoint lowering process.
+  // It uniformly handles invoke and call statepoints.
+  void LowerStatepoint(ImmutableStatepoint Statepoint,
+                       MachineBasicBlock *LandingPad = nullptr);
 private:
   std::pair<SDValue, SDValue> lowerInvokable(
           TargetLowering::CallLoweringInfo &CLI,
@@ -672,27 +730,6 @@ private:
   void visitIndirectBr(const IndirectBrInst &I);
   void visitUnreachable(const UnreachableInst &I);
 
-  // Helpers for visitSwitch
-  bool handleSmallSwitchRange(CaseRec& CR,
-                              CaseRecVector& WorkList,
-                              const Value* SV,
-                              MachineBasicBlock* Default,
-                              MachineBasicBlock *SwitchBB);
-  bool handleJTSwitchCase(CaseRec& CR,
-                          CaseRecVector& WorkList,
-                          const Value* SV,
-                          MachineBasicBlock* Default,
-                          MachineBasicBlock *SwitchBB);
-  bool handleBTSplitSwitchCase(CaseRec& CR,
-                               CaseRecVector& WorkList,
-                               const Value* SV,
-                               MachineBasicBlock *SwitchBB);
-  bool handleBitTestsSwitchCase(CaseRec& CR,
-                                CaseRecVector& WorkList,
-                                const Value* SV,
-                                MachineBasicBlock* Default,
-                                MachineBasicBlock *SwitchBB);
-
   uint32_t getEdgeWeight(const MachineBasicBlock *Src,
                          const MachineBasicBlock *Dst) const;
   void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
@@ -713,6 +750,8 @@ public:
   void visitJumpTable(JumpTable &JT);
   void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
                             MachineBasicBlock *SwitchBB);
+  unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV,
+                                   MachineBasicBlock *LPadMBB);
 
 private:
   // These all get lowered before this pass.
@@ -772,6 +811,8 @@ private:
   void visitStore(const StoreInst &I);
   void visitMaskedLoad(const CallInst &I);
   void visitMaskedStore(const CallInst &I);
+  void visitMaskedGather(const CallInst &I);
+  void visitMaskedScatter(const CallInst &I);
   void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
   void visitAtomicRMW(const AtomicRMWInst &I);
   void visitFence(const FenceInst &I);
@@ -820,9 +861,91 @@ private:
   /// EmitFuncArgumentDbgValue - If V is an function argument then create
   /// corresponding DBG_VALUE machine instruction for it now. At the end of
   /// instruction selection, they will be inserted to the entry BB.
-  bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr,
+  bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
+                                DIExpression *Expr, DILocation *DL,
                                 int64_t Offset, bool IsIndirect,
                                 const SDValue &N);
+
+  /// Return the next block after MBB, or nullptr if there is none.
+  MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
+
+  /// Update the DAG and DAG builder with the relevant information after
+  /// a new root node has been created which could be a tail call.
+  void updateDAGForMaybeTailCall(SDValue MaybeTC);
+};
+
+/// RegsForValue - This struct represents the registers (physical or virtual)
+/// that a particular set of values is assigned, and the type information about
+/// the value. The most common situation is to represent one value at a time,
+/// but struct or array values are handled element-wise as multiple values.  The
+/// splitting of aggregates is performed recursively, so that we never have
+/// aggregate-typed registers. The values at this point do not necessarily have
+/// legal types, so each value may require one or more registers of some legal
+/// type.
+///
+struct RegsForValue {
+  /// ValueVTs - The value types of the values, which may not be legal, and
+  /// may need be promoted or synthesized from one or more registers.
+  ///
+  SmallVector<EVT, 4> ValueVTs;
+
+  /// RegVTs - The value types of the registers. This is the same size as
+  /// ValueVTs and it records, for each value, what the type of the assigned
+  /// register or registers are. (Individual values are never synthesized
+  /// from more than one type of register.)
+  ///
+  /// With virtual registers, the contents of RegVTs is redundant with TLI's
+  /// getRegisterType member function, however when with physical registers
+  /// it is necessary to have a separate record of the types.
+  ///
+  SmallVector<MVT, 4> RegVTs;
+
+  /// Regs - This list holds the registers assigned to the values.
+  /// Each legal or promoted value requires one register, and each
+  /// expanded value requires multiple registers.
+  ///
+  SmallVector<unsigned, 4> Regs;
+
+  RegsForValue();
+
+  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+
+  RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg,
+               Type *Ty);
+
+  /// append - Add the specified values to this one.
+  void append(const RegsForValue &RHS) {
+    ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+    RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+    Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+  }
+
+  /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+  /// this value and returns the result as a ValueVTs value.  This uses
+  /// Chain/Flag as the input and updates them for the output Chain/Flag.
+  /// If the Flag pointer is NULL, no flag is used.
+  SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+                          SDLoc dl,
+                          SDValue &Chain, SDValue *Flag,
+                          const Value *V = nullptr) const;
+
+  /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified
+  /// value into the registers specified by this object.  This uses Chain/Flag
+  /// as the input and updates them for the output Chain/Flag.  If the Flag
+  /// pointer is nullptr, no flag is used.  If V is not nullptr, then it is used
+  /// in printing better diagnostic messages on error.
+  void
+  getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
+                SDValue *Flag, const Value *V = nullptr,
+                ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
+
+  /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+  /// operand list.  This adds the code marker, matching input operand index
+  /// (if applicable), and includes the number of values added into it.
+  void AddInlineAsmOperands(unsigned Kind,
+                            bool HasMatching, unsigned MatchingIdx, SDLoc dl,
+                            SelectionDAG &DAG,
+                            std::vector<SDValue> &Ops) const;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index e8577d8..96ee899 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -95,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::GLOBAL_OFFSET_TABLE:        return "GLOBAL_OFFSET_TABLE";
   case ISD::RETURNADDR:                 return "RETURNADDR";
   case ISD::FRAMEADDR:                  return "FRAMEADDR";
+  case ISD::FRAME_ALLOC_RECOVER:        return "FRAME_ALLOC_RECOVER";
   case ISD::READ_REGISTER:              return "READ_REGISTER";
   case ISD::WRITE_REGISTER:             return "WRITE_REGISTER";
   case ISD::FRAME_TO_ARGS_OFFSET:       return "FRAME_TO_ARGS_OFFSET";
@@ -187,10 +188,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FMUL:                       return "fmul";
   case ISD::FDIV:                       return "fdiv";
   case ISD::FMA:                        return "fma";
+  case ISD::FMAD:                       return "fmad";
   case ISD::FREM:                       return "frem";
   case ISD::FCOPYSIGN:                  return "fcopysign";
   case ISD::FGETSIGN:                   return "fgetsign";
   case ISD::FPOW:                       return "fpow";
+  case ISD::SMIN:                       return "smin";
+  case ISD::SMAX:                       return "smax";
+  case ISD::UMIN:                       return "umin";
+  case ISD::UMAX:                       return "umax";
 
   case ISD::FPOWI:                      return "fpowi";
   case ISD::SETCC:                      return "setcc";
@@ -271,6 +277,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::STORE:                      return "store";
   case ISD::MLOAD:                      return "masked_load";
   case ISD::MSTORE:                     return "masked_store";
+  case ISD::MGATHER:                    return "masked_gather";
+  case ISD::MSCATTER:                   return "masked_scatter";
   case ISD::VAARG:                      return "vaarg";
   case ISD::VACOPY:                     return "vacopy";
   case ISD::VAEND:                      return "vaend";
@@ -284,6 +292,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::DEBUGTRAP:                  return "debugtrap";
   case ISD::LIFETIME_START:             return "lifetime.start";
   case ISD::LIFETIME_END:               return "lifetime.end";
+  case ISD::GC_TRANSITION_START:        return "gc_transition.start";
+  case ISD::GC_TRANSITION_END:          return "gc_transition.end";
 
   // Bit manipulation
   case ISD::BSWAP:                      return "bswap";
@@ -518,22 +528,20 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getNodeId() != -1)
     OS << " [ID=" << getNodeId() << ']';
 
-  DebugLoc dl = getDebugLoc();
-  if (G && !dl.isUnknown()) {
-    DIScope
-      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
-    OS << " dbg:";
-    assert((!Scope || Scope.isScope()) &&
-      "Scope of a DebugLoc should be null or a DIScope.");
-    // Omit the directory, since it's usually long and uninteresting.
-    if (Scope)
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << dl.getLine();
-    if (dl.getCol() != 0)
-      OS << ':' << dl.getCol();
-  }
+  if (!G)
+    return;
+
+  DILocation *L = getDebugLoc();
+  if (!L)
+    return;
+
+  if (auto *Scope = L->getScope())
+    OS << Scope->getFilename();
+  else
+    OS << "<unknown>";
+  OS << ':' << L->getLine();
+  if (unsigned C = L->getColumn())
+    OS << ':' << C;
 }
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index ef54525..22f592a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "ScheduleDAGSDNodes.h"
 #include "SelectionDAGBuilder.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -19,10 +19,11 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -31,6 +32,8 @@
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
@@ -40,6 +43,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -47,7 +51,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -166,14 +169,13 @@ static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
                    "instruction selector"));
-static cl::opt<bool>
-EnableFastISelAbort("fast-isel-abort", cl::Hidden,
-          cl::desc("Enable abort calls when \"fast\" instruction selection "
-                   "fails to lower an instruction"));
-static cl::opt<bool>
-EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden,
-          cl::desc("Enable abort calls when \"fast\" instruction selection "
-                   "fails to lower a formal argument"));
+static cl::opt<int> EnableFastISelAbort(
+    "fast-isel-abort", cl::Hidden,
+    cl::desc("Enable abort calls when \"fast\" instruction selection "
+             "fails to lower an instruction: 0 disable the abort, 1 will "
+             "abort but for args, calls and terminators, 2 will also "
+             "abort for argument lowering, and 3 will never fallback "
+             "to SelectionDAG."));
 
 static cl::opt<bool>
 UseMBPI("use-mbpi",
@@ -291,7 +293,8 @@ namespace llvm {
     const TargetLowering *TLI = IS->TLI;
     const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
 
-    if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() ||
+    if (OptLevel == CodeGenOpt::None ||
+        (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
         TLI->getSchedulingPreference() == Sched::Source)
       return createSourceListDAGScheduler(IS, OptLevel);
     if (TLI->getSchedulingPreference() == Sched::RegPressure)
@@ -349,7 +352,8 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
     initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
     initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
-    initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+    initializeTargetLibraryInfoWrapperPassPass(
+        *PassRegistry::getPassRegistry());
   }
 
 SelectionDAGISel::~SelectionDAGISel() {
@@ -363,7 +367,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
-  AU.addRequired<TargetLibraryInfo>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
   if (UseMBPI && OptLevel != CodeGenOpt::None)
     AU.addRequired<BranchProbabilityInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -376,7 +380,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
 ///
 /// This is required for correctness, so it must be done at -O0.
 ///
-static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
   // Loop for blocks with phi nodes.
   for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
     PHINode *PN = dyn_cast<PHINode>(BB->begin());
@@ -400,8 +404,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
           continue;
 
         // Okay, we have to split this edge.
-        SplitCriticalEdge(Pred->getTerminator(),
-                          GetSuccessorNumber(Pred, BB), SDISel, true);
+        SplitCriticalEdge(
+            Pred->getTerminator(), GetSuccessorNumber(Pred, BB),
+            CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges());
         goto ReprocessBlock;
       }
   }
@@ -412,7 +417,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
   assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
-         "-fast-isel-abort requires -fast-isel");
+         "-fast-isel-abort > 0 requires -fast-isel");
 
   const Function &Fn = *mf.getFunction();
   MF = &mf;
@@ -433,12 +438,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   TLI = MF->getSubtarget().getTargetLowering();
   RegInfo = &MF->getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
-  LibInfo = &getAnalysis<TargetLibraryInfo>();
+  LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
-  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA);
 
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF, CurDAG);
@@ -496,12 +501,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       MachineBasicBlock::iterator InsertPos = Def;
       const MDNode *Variable = MI->getDebugVariable();
       const MDNode *Expr = MI->getDebugExpression();
+      DebugLoc DL = MI->getDebugLoc();
       bool IsIndirect = MI->isIndirectDebugValue();
       unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+      assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       // Def is never a terminator here, so it is ok to increment InsertPos.
-      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
-              TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset,
-              Variable, Expr);
+      BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
+              IsIndirect, LDI->second, Offset, Variable, Expr);
 
       // If this vreg is directly copied into an exported register then
       // that COPY instructions also need DBG_VALUE, if it is the only
@@ -519,9 +526,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
         CopyUseMI = nullptr; break;
       }
       if (CopyUseMI) {
+        // Use MI's debug location, which describes where Variable was
+        // declared, rather than whatever is attached to CopyUseMI.
         MachineInstr *NewMI =
-            BuildMI(*MF, CopyUseMI->getDebugLoc(),
-                    TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+            BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
                     CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
         MachineBasicBlock::iterator Pos = CopyUseMI;
         EntryMBB->insertAfter(Pos, NewMI);
@@ -570,6 +578,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
         TargetRegisterInfo::isVirtualRegister(To))
       MRI.constrainRegClass(To, MRI.getRegClass(From));
     // Replace it.
+
+
+    // Replacing one register with another won't touch the kill flags.
+    // We need to conservatively clear the kill flags as a kill on the old
+    // register might dominate existing uses of the new register.
+    if (!MRI.use_empty(To))
+      MRI.clearKillFlags(From);
     MRI.replaceRegWith(From, To);
   }
 
@@ -591,9 +606,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
                                         BasicBlock::const_iterator End,
                                         bool &HadTailCall) {
-  // Lower all of the non-terminator instructions. If a call is emitted
-  // as a tail call, cease emitting nodes for this block. Terminators
-  // are handled below.
+  // Lower the instructions. If a call is emitted as a tail call, cease emitting
+  // nodes for this block.
   for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
     SDB->visit(*I);
 
@@ -656,7 +670,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   (void)BlockNumber;
   bool MatchFilterBB = false; (void)MatchFilterBB;
 #ifndef NDEBUG
-  MatchFilterBB = (!FilterDAGBasicBlockName.empty() &&
+  MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
                    FilterDAGBasicBlockName ==
                        FuncInfo->MBB->getBasicBlock()->getName().str());
 #endif
@@ -667,8 +681,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 #endif
   {
     BlockNumber = FuncInfo->MBB->getNumber();
-    BlockName = MF->getName().str() + ":" +
-                FuncInfo->MBB->getBasicBlock()->getName().str();
+    BlockName =
+        (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
@@ -904,9 +918,11 @@ void SelectionDAGISel::DoInstructionSelection() {
 
 /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
 /// do other setup for EH landing-pad blocks.
-void SelectionDAGISel::PrepareEHLandingPad() {
+bool SelectionDAGISel::PrepareEHLandingPad() {
   MachineBasicBlock *MBB = FuncInfo->MBB;
 
+  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
+
   // Add a label to mark the beginning of the landing pad.  Deletion of the
   // landing pad can thus be detected via the MachineModuleInfo.
   MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -918,14 +934,53 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
+  // If this is an MSVC-style personality function, we need to split the landing
+  // pad into several BBs.
+  const BasicBlock *LLVMBB = MBB->getBasicBlock();
+  const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
+  MF->getMMI().addPersonality(
+      MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts()));
+  EHPersonality Personality = MF->getMMI().getPersonalityType();
+
+  if (isMSVCEHPersonality(Personality)) {
+    SmallVector<MachineBasicBlock *, 4> ClauseBBs;
+    const IntrinsicInst *ActionsCall =
+        dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt());
+    // Get all invoke BBs that unwind to this landingpad.
+    SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
+                                                  MBB->pred_end());
+    if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) {
+      // If this is a call to llvm.eh.actions followed by indirectbr, then we've
+      // run WinEHPrepare, and we should remove this block from the machine CFG.
+      // Mark the targets of the indirectbr as landingpads instead.
+      for (const BasicBlock *LLVMSucc : successors(LLVMBB)) {
+        MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc];
+        // Add the edge from the invoke to the clause.
+        for (MachineBasicBlock *InvokeBB : InvokeBBs)
+          InvokeBB->addSuccessor(ClauseBB);
+
+        // Mark the clause as a landing pad or MI passes will delete it.
+        ClauseBB->setIsLandingPad();
+      }
+    }
+
+    // Remove the edge from the invoke to the lpad.
+    for (MachineBasicBlock *InvokeBB : InvokeBBs)
+      InvokeBB->removeSuccessor(MBB);
+
+    // Don't select instructions for the landingpad.
+    return false;
+  }
+
   // Mark exception register as live in.
-  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
   if (unsigned Reg = TLI->getExceptionPointerRegister())
     FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
 
   // Mark exception selector register as live in.
   if (unsigned Reg = TLI->getExceptionSelectorRegister())
     FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+
+  return true;
 }
 
 /// isFoldedOrDeadInstruction - Return true if the specified instruction is
@@ -1095,8 +1150,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     // Setup an EH landing-pad block.
     FuncInfo->ExceptionPointerVirtReg = 0;
     FuncInfo->ExceptionSelectorVirtReg = 0;
-    if (FuncInfo->MBB->isLandingPad())
-      PrepareEHLandingPad();
+    if (LLVMBB->isLandingPad())
+      if (!PrepareEHLandingPad())
+        continue;
 
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
     if (FastIS) {
@@ -1111,8 +1167,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
         if (!FastIS->lowerArguments()) {
           // Fast isel failed to lower these arguments
           ++NumFastIselFailLowerArguments;
-          if (EnableFastISelAbortArgs)
-            llvm_unreachable("FastISel didn't lower all arguments");
+          if (EnableFastISelAbort > 1)
+            report_fatal_error("FastISel didn't lower all arguments");
 
           // Use SelectionDAG argument lowering
           LowerArguments(Fn);
@@ -1181,6 +1237,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
             dbgs() << "FastISel missed call: ";
             Inst->dump();
           }
+          if (EnableFastISelAbort > 2)
+            // FastISel selector couldn't handle something and bailed.
+            // For the purpose of debugging, just abort.
+            report_fatal_error("FastISel didn't select the entire block");
 
           if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[Inst];
@@ -1208,24 +1268,24 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           continue;
         }
 
-        if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
-          // Don't abort, and use a different message for terminator misses.
-          NumFastIselFailures += NumFastIselRemaining;
-          if (EnableFastISelVerbose || EnableFastISelAbort) {
+        bool ShouldAbort = EnableFastISelAbort;
+        if (EnableFastISelVerbose || EnableFastISelAbort) {
+          if (isa<TerminatorInst>(Inst)) {
+            // Use a different message for terminator misses.
             dbgs() << "FastISel missed terminator: ";
-            Inst->dump();
-          }
-        } else {
-          NumFastIselFailures += NumFastIselRemaining;
-          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            // Don't abort unless for terminator unless the level is really high
+            ShouldAbort = (EnableFastISelAbort > 2);
+          } else {
             dbgs() << "FastISel miss: ";
-            Inst->dump();
           }
-          if (EnableFastISelAbort)
-            // The "fast" selector couldn't handle something and bailed.
-            // For the purpose of debugging, just abort.
-            llvm_unreachable("FastISel didn't select the entire block");
+          Inst->dump();
         }
+        if (ShouldAbort)
+          // FastISel selector couldn't handle something and bailed.
+          // For the purpose of debugging, just abort.
+          report_fatal_error("FastISel didn't select the entire block");
+
+        NumFastIselFailures += NumFastIselRemaining;
         break;
       }
 
@@ -1354,21 +1414,15 @@ SelectionDAGISel::FinishBasicBlock() {
                  << FuncInfo->PHINodesToUpdate[i].first
                  << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
 
-  const bool MustUpdatePHINodes = SDB->SwitchCases.empty() &&
-                                  SDB->JTCases.empty() &&
-                                  SDB->BitTestCases.empty();
-
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
-  if (MustUpdatePHINodes) {
-    for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
-      assert(PHI->isPHI() &&
-             "This is not a machine PHI node that we are updating!");
-      if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
-        continue;
-      PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
-    }
+  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+    assert(PHI->isPHI() &&
+           "This is not a machine PHI node that we are updating!");
+    if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+      continue;
+    PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
   }
 
   // Handle stack protector.
@@ -1413,10 +1467,6 @@ SelectionDAGISel::FinishBasicBlock() {
     SDB->SPDescriptor.resetPerBBState();
   }
 
-  // If we updated PHI Nodes, return early.
-  if (MustUpdatePHINodes)
-    return;
-
   for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
     // Lower header first, if it wasn't already lowered
     if (!SDB->BitTestCases[i].Emitted) {
@@ -1530,16 +1580,6 @@ SelectionDAGISel::FinishBasicBlock() {
   }
   SDB->JTCases.clear();
 
-  // If the switch block involved a branch to one of the actual successors, we
-  // need to update PHI nodes in that block.
-  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
-    assert(PHI->isPHI() &&
-           "This is not a machine PHI node that we are updating!");
-    if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
-      PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
-  }
-
   // If we generated any switch lowering information, build and codegen any
   // additional DAGs necessary.
   for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
@@ -1677,11 +1717,10 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
   return false;
 }
 
-
 /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
 /// by tblgen.  Others should not call it.
 void SelectionDAGISel::
-SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) {
   std::vector<SDValue> InOps;
   std::swap(InOps, Ops);
 
@@ -1704,16 +1743,30 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
     } else {
       assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
              "Memory operand with multiple values?");
+
+      unsigned TiedToOperand;
+      if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+        // We need the constraint ID from the operand this is tied to.
+        unsigned CurOp = InlineAsm::Op_FirstOperand;
+        Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+        for (; TiedToOperand; --TiedToOperand) {
+          CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
+          Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+        }
+      }
+
       // Otherwise, this is a memory operand.  Ask the target to select it.
       std::vector<SDValue> SelOps;
-      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+      if (SelectInlineAsmMemoryOperand(InOps[i+1],
+                                       InlineAsm::getMemoryConstraintID(Flags),
+                                       SelOps))
         report_fatal_error("Could not match memory address.  Inline asm"
                            " failure!");
 
       // Add this to the output node.
       unsigned NewFlags =
         InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
-      Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
+      Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
       Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
       i += 2;
     }
@@ -1859,11 +1912,13 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
 }
 
 SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+  SDLoc DL(N);
+
   std::vector<SDValue> Ops(N->op_begin(), N->op_end());
-  SelectInlineAsmMemoryOperands(Ops);
+  SelectInlineAsmMemoryOperands(Ops, DL);
 
-  EVT VTs[] = { MVT::Other, MVT::Glue };
-  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops);
+  const EVT VTs[] = {MVT::Other, MVT::Glue};
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops);
   New->setNodeId(-1);
   return New.getNode();
 }
@@ -1871,12 +1926,12 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
 SDNode
 *SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
   SDLoc dl(Op);
-  MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0));
+  MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
   const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   unsigned Reg =
       TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0));
   SDValue New = CurDAG->getCopyFromReg(
-                        CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0));
+                        Op->getOperand(0), dl, Reg, Op->getValueType(0));
   New->setNodeId(-1);
   return New.getNode();
 }
@@ -1889,7 +1944,7 @@ SDNode
   unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
                                         Op->getOperand(2).getValueType());
   SDValue New = CurDAG->getCopyToReg(
-                        CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2));
+                        Op->getOperand(0), dl, Reg, Op->getOperand(2));
   New->setNodeId(-1);
   return New.getNode();
 }
@@ -2464,7 +2519,7 @@ public:
     SelectionDAG::DAGUpdateListener(DAG),
     RecordedNodes(RN), MatchScopes(MS) { }
 
-  void NodeDeleted(SDNode *N, SDNode *E) {
+  void NodeDeleted(SDNode *N, SDNode *E) override {
     // Some early-returns here to avoid the search if we deleted the node or
     // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
     // do, so it's unnecessary to update matching state at that point).
@@ -2885,7 +2940,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (Val & 128)
         Val = GetVBR(Val, MatcherTable, MatcherIndex);
       RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
-                              CurDAG->getTargetConstant(Val, VT), nullptr));
+                              CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch),
+                                                        VT), nullptr));
       continue;
     }
     case OPC_EmitRegister: {
@@ -2917,10 +2973,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
 
       if (Imm->getOpcode() == ISD::Constant) {
         const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
-        Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true);
+        Imm = CurDAG->getConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType(),
+                                  true);
       } else if (Imm->getOpcode() == ISD::ConstantFP) {
         const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
-        Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true);
+        Imm = CurDAG->getConstantFP(*Val, SDLoc(NodeToMatch),
+                                    Imm.getValueType(), true);
       }
 
       RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 33c20d3..2d4ab6c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/StackMaps.h"
@@ -37,13 +38,19 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
 STATISTIC(StatepointMaxSlotsRequired,
           "Maximum number of stack slots required for a singe statepoint");
 
-void
-StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
+static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
+                                 SelectionDAGBuilder &Builder, uint64_t Value) {
+  SDLoc L = Builder.getCurSDLoc();
+  Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L,
+                                              MVT::i64));
+  Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64));
+}
+
+void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
   // Consistency check
   assert(PendingGCRelocateCalls.empty() &&
          "Trying to visit statepoint before finished processing previous one");
   Locations.clear();
-  RelocLocations.clear();
   NextSlotToAllocate = 0;
   // Need to resize this on each safepoint - we need the two to stay in
   // sync and the clear patterns of a SelectionDAGBuilder have no relation
@@ -53,9 +60,9 @@ StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
     AllocatedStackSlots[i] = false;
   }
 }
+
 void StatepointLoweringState::clear() {
   Locations.clear();
-  RelocLocations.clear();
   AllocatedStackSlots.clear();
   assert(PendingGCRelocateCalls.empty() &&
          "cleared before statepoint sequence completed");
@@ -222,75 +229,94 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
 /// Extract call from statepoint, lower it and return pointer to the
 /// call node. Also update NodeMap so that getValue(statepoint) will
 /// reference lowered call result
-static SDNode *lowerCallFromStatepoint(const CallInst &CI,
-                                       SelectionDAGBuilder &Builder) {
-
-  assert(Intrinsic::experimental_gc_statepoint ==
-             dyn_cast<IntrinsicInst>(&CI)->getIntrinsicID() &&
-         "function called must be the statepoint function");
+static SDNode *
+lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
+                        SelectionDAGBuilder &Builder,
+                        SmallVectorImpl<SDValue> &PendingExports) {
+
+  ImmutableCallSite CS(ISP.getCallSite());
+
+  SDValue ActualCallee = Builder.getValue(ISP.getActualCallee());
+
+  // Handle immediate and symbolic callees.
+  if (auto *ConstCallee = dyn_cast<ConstantSDNode>(ActualCallee.getNode()))
+    ActualCallee = Builder.DAG.getIntPtrConstant(ConstCallee->getZExtValue(),
+                                                 Builder.getCurSDLoc(),
+                                                 /*isTarget=*/true);
+  else if (auto *SymbolicCallee =
+               dyn_cast<GlobalAddressSDNode>(ActualCallee.getNode()))
+    ActualCallee = Builder.DAG.getTargetGlobalAddress(
+        SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee),
+        SymbolicCallee->getValueType(0));
+
+  assert(CS.getCallingConv() != CallingConv::AnyReg &&
+         "anyregcc is not supported on statepoints!");
+
+  Type *DefTy = ISP.getActualReturnType();
+  bool HasDef = !DefTy->isVoidTy();
+
+  SDValue ReturnValue, CallEndVal;
+  std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands(
+      ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos,
+      ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad,
+      false /* IsPatchPoint */);
+
+  SDNode *CallEnd = CallEndVal.getNode();
+
+  // Get a call instruction from the call sequence chain.  Tail calls are not
+  // allowed.  The following code is essentially reverse engineering X86's
+  // LowerCallTo.
+  //
+  // We are expecting DAG to have the following form:
+  //
+  // ch = eh_label (only in case of invoke statepoint)
+  //   ch, glue = callseq_start ch
+  //   ch, glue = X86::Call ch, glue
+  //   ch, glue = callseq_end ch, glue
+  //   get_return_value ch, glue
+  //
+  // get_return_value can either be a CopyFromReg to grab the return value from
+  // %RAX, or it can be a LOAD to load a value returned by reference via a stack
+  // slot.
+
+  if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg ||
+                 CallEnd->getOpcode() == ISD::LOAD))
+    CallEnd = CallEnd->getOperand(0).getNode();
+
+  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
 
-  ImmutableStatepoint StatepointOperands(&CI);
-
-  // Lower the actual call itself - This is a bit of a hack, but we want to
-  // avoid modifying the actual lowering code.  This is similiar in intent to
-  // the LowerCallOperands mechanism used by PATCHPOINT, but is structured
-  // differently.  Hopefully, this is slightly more robust w.r.t. calling
-  // convention, return values, and other function attributes.
-  Value *ActualCallee = const_cast<Value *>(StatepointOperands.actualCallee());
-
-  std::vector<Value *> Args;
-  CallInst::const_op_iterator arg_begin = StatepointOperands.call_args_begin();
-  CallInst::const_op_iterator arg_end = StatepointOperands.call_args_end();
-  Args.insert(Args.end(), arg_begin, arg_end);
-  // TODO: remove the creation of a new instruction!  We should not be
-  // modifying the IR (even temporarily) at this point.
-  CallInst *Tmp = CallInst::Create(ActualCallee, Args);
-  Tmp->setTailCall(CI.isTailCall());
-  Tmp->setCallingConv(CI.getCallingConv());
-  Tmp->setAttributes(CI.getAttributes());
-  Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false);
-
-  // Handle the return value of the call iff any.
-  const bool HasDef = !Tmp->getType()->isVoidTy();
   if (HasDef) {
-    // The value of the statepoint itself will be the value of call itself.
-    // We'll replace the actually call node shortly.  gc_result will grab
-    // this value.
-    Builder.setValue(&CI, Builder.getValue(Tmp));
+    if (CS.isInvoke()) {
+      // Result value will be used in different basic block for invokes
+      // so we need to export it now. But statepoint call has a different type
+      // than the actuall call. It means that standart exporting mechanism will
+      // create register of the wrong type. So instead we need to create
+      // register with correct type and save value into it manually.
+      // TODO: To eliminate this problem we can remove gc.result intrinsics
+      //       completelly and make statepoint call to return a tuple.
+      unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
+      RegsForValue RFV(*Builder.DAG.getContext(),
+                       Builder.DAG.getTargetLoweringInfo(), Reg,
+                       ISP.getActualReturnType());
+      SDValue Chain = Builder.DAG.getEntryNode();
+
+      RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain,
+                        nullptr);
+      PendingExports.push_back(Chain);
+      Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg;
+    } else {
+      // The value of the statepoint itself will be the value of call itself.
+      // We'll replace the actually call node shortly.  gc_result will grab
+      // this value.
+      Builder.setValue(CS.getInstruction(), ReturnValue);
+    }
   } else {
     // The token value is never used from here on, just generate a poison value
-    Builder.setValue(&CI, Builder.DAG.getIntPtrConstant(-1));
-  }
-  // Remove the fake entry we created so we don't have a hanging reference
-  // after we delete this node.
-  Builder.removeValue(Tmp);
-  delete Tmp;
-  Tmp = nullptr;
-
-  // Search for the call node
-  // The following code is essentially reverse engineering X86's
-  // LowerCallTo.
-  SDNode *CallNode = nullptr;
-
-  // We just emitted a call, so it should be last thing generated
-  SDValue Chain = Builder.DAG.getRoot();
-
-  // Find closest CALLSEQ_END walking back through lowered nodes if needed
-  SDNode *CallEnd = Chain.getNode();
-  int Sanity = 0;
-  while (CallEnd->getOpcode() != ISD::CALLSEQ_END) {
-    CallEnd = CallEnd->getGluedNode();
-    assert(CallEnd && "Can not find call node");
-    assert(Sanity < 20 && "should have found call end already");
-    Sanity++;
+    Builder.setValue(CS.getInstruction(),
+                     Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc()));
   }
-  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
-         "Expected a callseq node.");
-  assert(CallEnd->getGluedNode());
 
-  // Step back inside the CALLSEQ
-  CallNode = CallEnd->getGluedNode();
-  return CallNode;
+  return CallEnd->getOperand(0).getNode();
 }
 
 /// Callect all gc pointers coming into statepoint intrinsic, clean them up,
@@ -300,24 +326,15 @@ static SDNode *lowerCallFromStatepoint(const CallInst &CI,
 ///   Relocs - the gc_relocate corresponding to each base/ptr pair
 /// Elements of this arrays should be in one-to-one correspondence with each
 /// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call
-static void
-getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases,
-                              SmallVectorImpl<const Value *> &Ptrs,
-                              SmallVectorImpl<const Value *> &Relocs,
-                              ImmutableCallSite Statepoint,
-                              SelectionDAGBuilder &Builder) {
-  // Search for relocated pointers.  Note that working backwards from the
-  // gc_relocates ensures that we only get pairs which are actually relocated
-  // and used after the statepoint.
-  // TODO: This logic should probably become a utility function in Statepoint.h
-  for (const User *U : cast<CallInst>(Statepoint.getInstruction())->users()) {
-    if (!isGCRelocate(U)) {
-      continue;
-    }
-    GCRelocateOperands relocateOpers(U);
-    Relocs.push_back(cast<Value>(U));
-    Bases.push_back(relocateOpers.basePtr());
-    Ptrs.push_back(relocateOpers.derivedPtr());
+static void getIncomingStatepointGCValues(
+    SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs,
+    SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite,
+    SelectionDAGBuilder &Builder) {
+  for (GCRelocateOperands relocateOpers :
+       StatepointSite.getRelocates(StatepointSite)) {
+    Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
+    Bases.push_back(relocateOpers.getBasePtr());
+    Ptrs.push_back(relocateOpers.getDerivedPtr());
   }
 
   // Remove any redundant llvm::Values which map to the same SDValue as another
@@ -376,14 +393,13 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
     // such in the stackmap.  This is required so that the consumer can
     // parse any internal format to the deopt state.  It also handles null
     // pointers and other constant pointers in GC states
-    Ops.push_back(
-        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
-    Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+    pushStackMapConstant(Ops, Builder, C->getSExtValue());
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
-    // This handles allocas as arguments to the statepoint
-    const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
-    Ops.push_back(
-        Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+    // This handles allocas as arguments to the statepoint (this is only
+    // really meaningful for a deopt value.  For GC, we'd be trying to
+    // relocate the address of the alloca itself?)
+    Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+                                                  Incoming.getValueType()));
   } else {
     // Otherwise, locate a spill slot and explicitly spill it so it
     // can be found by the runtime later.  We currently do not support
@@ -408,15 +424,15 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
 /// statepoint. The chain nodes will have already been created and the DAG root
 /// will be set to the last value spilled (if any were).
 static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
-                                    ImmutableStatepoint Statepoint,
+                                    ImmutableStatepoint StatepointSite,
                                     SelectionDAGBuilder &Builder) {
 
   // Lower the deopt and gc arguments for this statepoint.  Layout will
   // be: deopt argument length, deopt arguments.., gc arguments...
 
   SmallVector<const Value *, 64> Bases, Ptrs, Relocations;
-  getIncomingStatepointGCValues(Bases, Ptrs, Relocations,
-                                Statepoint.getCallSite(), Builder);
+  getIncomingStatepointGCValues(Bases, Ptrs, Relocations, StatepointSite,
+                                Builder);
 
 #ifndef NDEBUG
   // Check that each of the gc pointer and bases we've gotten out of the
@@ -424,61 +440,54 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // heap.  This is basically just here to help catch errors during statepoint
   // insertion. TODO: This should actually be in the Verifier, but we can't get
   // to the GCStrategy from there (yet).
-  if (Builder.GFI) {
-    GCStrategy &S = Builder.GFI->getStrategy();
-    for (const Value *V : Bases) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() &&
-               "non gc managed base pointer found in statepoint");
-      }
+  GCStrategy &S = Builder.GFI->getStrategy();
+  for (const Value *V : Bases) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() &&
+             "non gc managed base pointer found in statepoint");
     }
-    for (const Value *V : Ptrs) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() &&
-               "non gc managed derived pointer found in statepoint");
-      }
+  }
+  for (const Value *V : Ptrs) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() &&
+             "non gc managed derived pointer found in statepoint");
     }
-    for (const Value *V : Relocations) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() && "non gc managed pointer relocated");
-      }
+  }
+  for (const Value *V : Relocations) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() && "non gc managed pointer relocated");
     }
   }
 #endif
 
-
-
   // Before we actually start lowering (and allocating spill slots for values),
   // reserve any stack slots which we judge to be profitable to reuse for a
   // particular value.  This is purely an optimization over the code below and
   // doesn't change semantics at all.  It is important for performance that we
   // reserve slots for both deopt and gc values before lowering either.
-  for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end();
-       I != E; ++I) {
-    Value *V = *I;
+  for (const Value *V : StatepointSite.vm_state_args()) {
     SDValue Incoming = Builder.getValue(V);
     reservePreviousStackSlotForValue(Incoming, Builder);
   }
-  for (unsigned i = 0; i < Bases.size() * 2; ++i) {
-    // Even elements will contain base, odd elements - derived ptr
-    const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2];
-    SDValue Incoming = Builder.getValue(V);
-    reservePreviousStackSlotForValue(Incoming, Builder);
+  for (unsigned i = 0; i < Bases.size(); ++i) {
+    const Value *Base = Bases[i];
+    reservePreviousStackSlotForValue(Builder.getValue(Base), Builder);
+
+    const Value *Ptr = Ptrs[i];
+    reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder);
   }
 
   // First, prefix the list with the number of unique values to be
   // lowered.  Note that this is the number of *Values* not the
   // number of SDValues required to lower them.
-  const int NumVMSArgs = Statepoint.numTotalVMSArgs();
-  Ops.push_back(
-      Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
-  Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64));
+  const int NumVMSArgs = StatepointSite.getNumTotalVMSArgs();
+  pushStackMapConstant(Ops, Builder, NumVMSArgs);
 
-  assert(NumVMSArgs + 1 == std::distance(Statepoint.vm_state_begin(),
-                                         Statepoint.vm_state_end()));
+  assert(NumVMSArgs == std::distance(StatepointSite.vm_state_begin(),
+                                     StatepointSite.vm_state_end()));
 
   // The vm state arguments are lowered in an opaque manner.  We do
   // not know what type of values are contained within.  We skip the
@@ -486,9 +495,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // explicitly just above.  We could have left it in the loop and
   // not done it explicitly, but it's far easier to understand this
   // way.
-  for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end();
-       I != E; ++I) {
-    const Value *V = *I;
+  for (const Value *V : StatepointSite.vm_state_args()) {
     SDValue Incoming = Builder.getValue(V);
     lowerIncomingStatepointValue(Incoming, Ops, Builder);
   }
@@ -498,35 +505,96 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // arrays interwoven with each (lowered) base pointer immediately followed by
   // it's (lowered) derived pointer.  i.e
   // (base[0], ptr[0], base[1], ptr[1], ...)
-  for (unsigned i = 0; i < Bases.size() * 2; ++i) {
-    // Even elements will contain base, odd elements - derived ptr
-    const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2];
+  for (unsigned i = 0; i < Bases.size(); ++i) {
+    const Value *Base = Bases[i];
+    lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder);
+
+    const Value *Ptr = Ptrs[i];
+    lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder);
+  }
+
+  // If there are any explicit spill slots passed to the statepoint, record
+  // them, but otherwise do not do anything special.  These are user provided
+  // allocas and give control over placement to the consumer.  In this case,
+  // it is the contents of the slot which may get updated, not the pointer to
+  // the alloca
+  for (Value *V : StatepointSite.gc_args()) {
     SDValue Incoming = Builder.getValue(V);
-    lowerIncomingStatepointValue(Incoming, Ops, Builder);
+    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+      // This handles allocas as arguments to the statepoint
+      Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+                                                    Incoming.getValueType()));
+    }
+  }
+
+  // Record computed locations for all lowered values.
+  // This can not be embedded in lowering loops as we need to record *all*
+  // values, while previous loops account only values with unique SDValues.
+  const Instruction *StatepointInstr =
+    StatepointSite.getCallSite().getInstruction();
+  FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
+    Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr];
+
+  for (GCRelocateOperands RelocateOpers :
+       StatepointSite.getRelocates(StatepointSite)) {
+    const Value *V = RelocateOpers.getDerivedPtr();
+    SDValue SDV = Builder.getValue(V);
+    SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
+
+    if (Loc.getNode()) {
+      SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+    } else {
+      // Record value as visited, but not spilled. This is case for allocas
+      // and constants. For this values we can avoid emiting spill load while
+      // visiting corresponding gc_relocate.
+      // Actually we do not need to record them in this map at all.
+      // We do this only to check that we are not relocating any unvisited value.
+      SpillMap[V] = None;
+
+      // Default llvm mechanisms for exporting values which are used in
+      // different basic blocks does not work for gc relocates.
+      // Note that it would be incorrect to teach llvm that all relocates are
+      // uses of the corresponging values so that it would automatically
+      // export them. Relocates of the spilled values does not use original
+      // value.
+      if (StatepointSite.getCallSite().isInvoke())
+        Builder.ExportFromCurrentBlock(V);
+    }
   }
 }
+
 void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
+  // Check some preconditions for sanity
+  assert(isStatepoint(&CI) &&
+         "function called must be the statepoint function");
+
+  LowerStatepoint(ImmutableStatepoint(&CI));
+}
+
+void SelectionDAGBuilder::LowerStatepoint(
+    ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) {
   // The basic scheme here is that information about both the original call and
   // the safepoint is encoded in the CallInst.  We create a temporary call and
   // lower it, then reverse engineer the calling sequence.
 
-  // Check some preconditions for sanity
-  assert(isStatepoint(&CI) &&
-         "function called must be the statepoint function");
   NumOfStatepoints++;
   // Clear state
   StatepointLowering.startNewStatepoint(*this);
 
+  ImmutableCallSite CS(ISP.getCallSite());
+
 #ifndef NDEBUG
-  // Consistency check
-  for (const User *U : CI.users()) {
-    const CallInst *Call = cast<CallInst>(U);
-    if (isGCRelocate(Call))
-      StatepointLowering.scheduleRelocCall(*Call);
+  // Consistency check. Don't do this for invokes. It would be too
+  // expensive to preserve this information across different basic blocks
+  if (!CS.isInvoke()) {
+    for (const User *U : CS->users()) {
+      const CallInst *Call = cast<CallInst>(U);
+      if (isGCRelocate(Call))
+        StatepointLowering.scheduleRelocCall(*Call);
+    }
   }
 #endif
 
-  ImmutableStatepoint ISP(&CI);
 #ifndef NDEBUG
   // If this is a malformed statepoint, report it early to simplify debugging.
   // This should catch any IR level mistake that's made when constructing or
@@ -534,42 +602,82 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
   ISP.verify();
 
   // Check that the associated GCStrategy expects to encounter statepoints.
-  // TODO: This if should become an assert.  For now, we allow the GCStrategy
-  // to be optional for backwards compatibility.  This will only last a short
-  // period (i.e. a couple of weeks).
-  if (GFI) {
-    assert(GFI->getStrategy().useStatepoints() &&
-           "GCStrategy does not expect to encounter statepoints");
-  }
+  assert(GFI->getStrategy().useStatepoints() &&
+         "GCStrategy does not expect to encounter statepoints");
 #endif
 
-
   // Lower statepoint vmstate and gcstate arguments
-  SmallVector<SDValue, 10> LoweredArgs;
-  lowerStatepointMetaArgs(LoweredArgs, ISP, *this);
+  SmallVector<SDValue, 10> LoweredMetaArgs;
+  lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this);
 
   // Get call node, we will replace it later with statepoint
-  SDNode *CallNode = lowerCallFromStatepoint(CI, *this);
+  SDNode *CallNode =
+      lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports);
 
-  // Construct the actual STATEPOINT node with all the appropriate arguments
-  // and return values.
+  // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
+  // nodes with all the appropriate arguments and return values.
+
+  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+  SDValue Chain = CallNode->getOperand(0);
+
+  SDValue Glue;
+  bool CallHasIncomingGlue = CallNode->getGluedNode();
+  if (CallHasIncomingGlue) {
+    // Glue is always last operand
+    Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
+  }
+
+  // Build the GC_TRANSITION_START node if necessary.
+  //
+  // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the
+  // order in which they appear in the call to the statepoint intrinsic. If
+  // any of the operands is a pointer-typed, that operand is immediately
+  // followed by a SRCVALUE for the pointer that may be used during lowering
+  // (e.g. to form MachinePointerInfo values for loads/stores).
+  const bool IsGCTransition =
+      (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) ==
+          (uint64_t)StatepointFlags::GCTransition;
+  if (IsGCTransition) {
+    SmallVector<SDValue, 8> TSOps;
+
+    // Add chain
+    TSOps.push_back(Chain);
+
+    // Add GC transition arguments
+    for (const Value *V : ISP.gc_transition_args()) {
+      TSOps.push_back(getValue(V));
+      if (V->getType()->isPointerTy())
+        TSOps.push_back(DAG.getSrcValue(V));
+    }
+
+    // Add glue if necessary
+    if (CallHasIncomingGlue)
+      TSOps.push_back(Glue);
+
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+    SDValue GCTransitionStart =
+        DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps);
+
+    Chain = GCTransitionStart.getValue(0);
+    Glue = GCTransitionStart.getValue(1);
+  }
 
   // TODO: Currently, all of these operands are being marked as read/write in
   // PrologEpilougeInserter.cpp, we should special case the VMState arguments
   // and flags to be read-only.
   SmallVector<SDValue, 40> Ops;
 
+  // Add the <id> and <numBytes> constants.
+  Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64));
+  Ops.push_back(
+      DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32));
+
   // Calculate and push starting position of vmstate arguments
-  // Call Node: Chain, Target, {Args}, RegMask, [Glue]
-  SDValue Glue;
-  if (CallNode->getGluedNode()) {
-    // Glue is always last operand
-    Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
-  }
   // Get number of arguments incoming directly into call node
   unsigned NumCallRegArgs =
-      CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3);
-  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+      CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3);
+  Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32));
 
   // Add call target
   SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
@@ -578,47 +686,74 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
   // Add call arguments
   // Get position of register mask in the call
   SDNode::op_iterator RegMaskIt;
-  if (Glue.getNode())
+  if (CallHasIncomingGlue)
     RegMaskIt = CallNode->op_end() - 2;
   else
     RegMaskIt = CallNode->op_end() - 1;
   Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);
 
-  // Add a leading constant argument with the Flags and the calling convention
-  // masked together
-  CallingConv::ID CallConv = CI.getCallingConv();
-  int Flags = dyn_cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
-  assert(Flags == 0 && "not expected to be used");
-  Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
-  Ops.push_back(
-      DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64));
+  // Add a constant argument for the calling convention
+  pushStackMapConstant(Ops, *this, CS.getCallingConv());
+
+  // Add a constant argument for the flags
+  uint64_t Flags = ISP.getFlags();
+  assert(
+      ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0)
+          && "unknown flag used");
+  pushStackMapConstant(Ops, *this, Flags);
 
   // Insert all vmstate and gcstate arguments
-  Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end());
+  Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());
 
   // Add register mask from call node
   Ops.push_back(*RegMaskIt);
 
   // Add chain
-  Ops.push_back(CallNode->getOperand(0));
+  Ops.push_back(Chain);
 
   // Same for the glue, but we add it only if original call had it
   if (Glue.getNode())
     Ops.push_back(Glue);
 
-  // Compute return values
-  SmallVector<EVT, 21> ValueVTs;
-  ValueVTs.push_back(MVT::Other);
-  ValueVTs.push_back(MVT::Glue); // provide a glue output since we consume one
-  // as input.  This allows someone else to chain
-  // off us as needed.
-  SDVTList NodeTys = DAG.getVTList(ValueVTs);
+  // Compute return values.  Provide a glue output since we consume one as
+  // input.  This allows someone else to chain off us as needed.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  SDNode *StatepointMCNode =
+      DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+
+  SDNode *SinkNode = StatepointMCNode;
 
-  SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT,
-                                                getCurSDLoc(), NodeTys, Ops);
+  // Build the GC_TRANSITION_END node if necessary.
+  //
+  // See the comment above regarding GC_TRANSITION_START for the layout of
+  // the operands to the GC_TRANSITION_END node.
+  if (IsGCTransition) {
+    SmallVector<SDValue, 8> TEOps;
+
+    // Add chain
+    TEOps.push_back(SDValue(StatepointMCNode, 0));
+
+    // Add GC transition arguments
+    for (const Value *V : ISP.gc_transition_args()) {
+      TEOps.push_back(getValue(V));
+      if (V->getType()->isPointerTy())
+        TEOps.push_back(DAG.getSrcValue(V));
+    }
+
+    // Add glue
+    TEOps.push_back(SDValue(StatepointMCNode, 1));
+
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+    SDValue GCTransitionStart =
+        DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps);
+
+    SinkNode = GCTransitionStart.getNode();
+  }
 
   // Replace original call
-  DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root
+  DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
   // Remove originall call node
   DAG.DeleteNode(CallNode);
 
@@ -636,49 +771,72 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
   // The result value of the gc_result is simply the result of the actual
   // call.  We've already emitted this, so just grab the value.
   Instruction *I = cast<Instruction>(CI.getArgOperand(0));
-  assert(isStatepoint(I) &&
-         "first argument must be a statepoint token");
-
-  setValue(&CI, getValue(I));
+  assert(isStatepoint(I) && "first argument must be a statepoint token");
+
+  if (isa<InvokeInst>(I)) {
+    // For invokes we should have stored call result in a virtual register.
+    // We can not use default getValue() functionality to copy value from this
+    // register because statepoint and actuall call return types can be
+    // different, and getValue() will use CopyFromReg of the wrong type,
+    // which is always i32 in our case.
+    PointerType *CalleeType =
+        cast<PointerType>(ImmutableStatepoint(I).getActualCallee()->getType());
+    Type *RetTy =
+        cast<FunctionType>(CalleeType->getElementType())->getReturnType();
+    SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
+
+    assert(CopyFromReg.getNode());
+    setValue(&CI, CopyFromReg);
+  } else {
+    setValue(&CI, getValue(I));
+  }
 }
 
 void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
+  GCRelocateOperands RelocateOpers(&CI);
+
 #ifndef NDEBUG
   // Consistency check
-  StatepointLowering.relocCallVisited(CI);
+  // We skip this check for invoke statepoints. It would be too expensive to
+  // preserve validation info through different basic blocks.
+  if (!RelocateOpers.isTiedToInvoke()) {
+    StatepointLowering.relocCallVisited(CI);
+  }
 #endif
 
-  GCRelocateOperands relocateOpers(&CI);
-  SDValue SD = getValue(relocateOpers.derivedPtr());
+  const Value *DerivedPtr = RelocateOpers.getDerivedPtr();
+  SDValue SD = getValue(DerivedPtr);
+
+  FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
+    FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()];
 
-  if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) {
-    // We didn't need to spill these special cases (constants and allocas).
-    // See the handling in spillIncomingValueForStatepoint for detail.
+  // We should have recorded location for this pointer
+  assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value");
+  Optional<int> DerivedPtrLocation = SpillMap[DerivedPtr];
+
+  // We didn't need to spill these special cases (constants and allocas).
+  // See the handling in spillIncomingValueForStatepoint for detail.
+  if (!DerivedPtrLocation) {
     setValue(&CI, SD);
     return;
   }
 
-  SDValue Loc = StatepointLowering.getRelocLocation(SD);
-  // Emit new load if we did not emit it before
-  if (!Loc.getNode()) {
-    SDValue SpillSlot = StatepointLowering.getLocation(SD);
-    int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+  SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation,
+                                              SD.getValueType());
 
-    // Be conservative: flush all pending loads
-    // TODO: Probably we can be less restrictive on this,
-    // it may allow more scheduling opprtunities
-    SDValue Chain = getRoot();
+  // Be conservative: flush all pending loads
+  // TODO: Probably we can be less restrictive on this,
+  // it may allow more scheduling opprtunities
+  SDValue Chain = getRoot();
 
-    Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain,
-                      SpillSlot, MachinePointerInfo::getFixedStack(FI), false,
-                      false, false, 0);
+  SDValue SpillLoad =
+    DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+                MachinePointerInfo::getFixedStack(*DerivedPtrLocation),
+                false, false, false, 0);
 
-    StatepointLowering.setRelocLocation(SD, Loc);
+  // Again, be conservative, don't emit pending loads
+  DAG.setRoot(SpillLoad.getValue(1));
 
-    // Again, be conservative, don't emit pending loads
-    DAG.setRoot(Loc.getValue(1));
-  }
-
-  assert(Loc.getNode());
-  setValue(&CI, Loc);
+  assert(SpillLoad.getNode());
+  setValue(&CI, SpillLoad);
 }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 673112c..82d0c62 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -30,8 +30,7 @@ class SelectionDAGBuilder;
 /// works in concert with information in FunctionLoweringInfo.
 class StatepointLoweringState {
 public:
-  StatepointLoweringState() : NextSlotToAllocate(0) {
-  }
+  StatepointLoweringState() : NextSlotToAllocate(0) {}
 
   /// Reset all state tracking for a newly encountered safepoint.  Also
   /// performs some consistency checking.
@@ -57,25 +56,6 @@ public:
     Locations[val] = Location;
   }
 
-  /// Returns the relocated value for a given input pointer. Will
-  /// return SDValue() if this value hasn't yet been reloaded from
-  /// it's stack slot after the statepoint.  Otherwise, the value
-  /// has already been reloaded and the SDValue of that reload will
-  /// be returned. Note that VMState values are spilled but not
-  /// reloaded (since they don't change at the safepoint unless
-  /// also listed in the GC pointer section) and will thus never
-  /// be in this map
-  SDValue getRelocLocation(SDValue val) {
-    if (!RelocLocations.count(val))
-      return SDValue();
-    return RelocLocations[val];
-  }
-  void setRelocLocation(SDValue val, SDValue Location) {
-    assert(!RelocLocations.count(val) &&
-           "Trying to allocate already allocated location");
-    RelocLocations[val] = Location;
-  }
-
   /// Record the fact that we expect to encounter a given gc_relocate
   /// before the next statepoint.  If we don't see it, we'll report
   /// an assertion.
@@ -118,8 +98,6 @@ private:
   /// Maps pre-relocation value (gc pointer directly incoming into statepoint)
   /// into it's location (currently only stack slots)
   DenseMap<SDValue, SDValue> Locations;
-  /// Map pre-relocated value into it's new relocated location
-  DenseMap<SDValue, SDValue> RelocLocations;
 
   /// A boolean indicator for each slot listed in the FunctionInfo as to
   /// whether it has been used in the current statepoint.  Since we try to
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f12c035..833da4b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -100,6 +100,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
     Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
     Args.push_back(Entry);
   }
+  if (LC == RTLIB::UNKNOWN_LIBCALL)
+    report_fatal_error("Unsupported library call operation!");
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
 
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
@@ -201,7 +203,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
   SDValue Ops[2] = { NewLHS, NewRHS };
   NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
                        dl).first;
-  NewRHS = DAG.getConstant(0, RetVT);
+  NewRHS = DAG.getConstant(0, dl, RetVT);
   CCCode = getCmpLibcallCC(LC1);
   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
     SDValue Tmp = DAG.getNode(ISD::SETCC, dl,
@@ -303,7 +305,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
       SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
                                 DAG.getConstant(Demanded &
                                                 C->getAPIntValue(),
-                                                VT));
+                                                dl, VT));
       return CombineTo(Op, New);
     }
 
@@ -447,7 +449,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If all of the demanded bits in the inputs are known zeros, return zero.
     if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
-      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
     // If the RHS is a constant, see if we can simplify it.
     if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
       return true;
@@ -535,7 +537,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
       if (KnownOne == KnownOne2) { // set bits are the same on both sides
         EVT VT = Op.getValueType();
-        SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+        SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT);
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
                                                  Op.getOperand(0), ANDC));
       }
@@ -551,7 +553,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         if (Expanded != C->getAPIntValue()) {
           EVT VT = Op.getValueType();
           SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
-                                          TLO.DAG.getConstant(Expanded, VT));
+                                        TLO.DAG.getConstant(Expanded, dl, VT));
           return TLO.CombineTo(Op, New);
         }
         // if it already has all the bits set, nothing to change
@@ -624,7 +626,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           }
 
           SDValue NewSA =
-            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+            TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
           EVT VT = Op.getValueType();
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
@@ -648,7 +650,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
             ShTy = InnerVT;
           SDValue NarrowShl =
             TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
-                            TLO.DAG.getConstant(ShAmt, ShTy));
+                            TLO.DAG.getConstant(ShAmt, dl, ShTy));
           return
             TLO.CombineTo(Op,
                           TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
@@ -670,7 +672,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
               NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
               NewMask.trunc(ShAmt) == 0) {
             SDValue NewSA =
-              TLO.DAG.getConstant(ShAmt - InnerShAmt,
+              TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
                                   Op.getOperand(1).getValueType());
             EVT VT = Op.getValueType();
             SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
@@ -713,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           }
 
           SDValue NewSA =
-            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+            TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
         }
@@ -778,7 +780,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (Log2 >= 0) {
         // The bit must come from the sign.
         SDValue NewSA =
-          TLO.DAG.getConstant(BitWidth - 1 - Log2,
+          TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
                               Op.getOperand(1).getValueType());
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
                                                  Op.getOperand(0), NewSA));
@@ -794,19 +796,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
     // If we only care about the highest bit, don't bother shifting right.
-    if (MsbMask == DemandedMask) {
+    if (MsbMask == NewMask) {
       unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
       SDValue InOp = Op.getOperand(0);
-
-      // Compute the correct shift amount type, which must be getShiftAmountTy
-      // for scalar types after legalization.
-      EVT ShiftAmtTy = Op.getValueType();
-      if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
-        ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
-
-      SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
-                                            Op.getValueType(), InOp, ShiftAmt));
+      unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+      bool AlreadySignExtended =
+        TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+      // However if the input is already sign extended we expect the sign
+      // extension to be dropped altogether later and do not simplify.
+      if (!AlreadySignExtended) {
+        // Compute the correct shift amount type, which must be getShiftAmountTy
+        // for scalar types after legalization.
+        EVT ShiftAmtTy = Op.getValueType();
+        if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+          ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+        SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
+                                               ShiftAmtTy);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+                                                 Op.getValueType(), InOp,
+                                                 ShiftAmt));
+      }
     }
 
     // Sign extension.  Compute the demanded bits in the result that are not
@@ -985,7 +995,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         if (TLO.LegalTypes()) {
           uint64_t ShVal = ShAmt->getZExtValue();
           Shift =
-            TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType()));
+            TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(Op.getValueType()));
         }
 
         APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
@@ -1043,7 +1053,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         if (!OpVTLegal && OpVTSizeInBits > 32)
           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
         unsigned ShVal = Op.getValueType().getSizeInBits()-1;
-        SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
+        SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
                                                  Op.getValueType(),
                                                  Sign, ShAmt));
@@ -1076,8 +1086,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
   // If we know the value of all of the demanded bits, return this as a
   // constant.
-  if ((NewMask & (KnownZero|KnownOne)) == NewMask)
-    return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+  if ((NewMask & (KnownZero|KnownOne)) == NewMask) {
+    // Avoid folding to a constant if any OpaqueConstant is involved.
+    const SDNode *N = Op.getNode();
+    for (SDNodeIterator I = SDNodeIterator::begin(N),
+         E = SDNodeIterator::end(N); I != E; ++I) {
+      SDNode *Op = *I;
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (C->isOpaque())
+          return false;
+    }
+    return TLO.CombineTo(Op,
+                         TLO.DAG.getConstant(KnownOne, dl, Op.getValueType()));
+  }
 
   return false;
 }
@@ -1213,13 +1234,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   switch (Cond) {
   default: break;
   case ISD::SETFALSE:
-  case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+  case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT);
   case ISD::SETTRUE:
   case ISD::SETTRUE2: {
     TargetLowering::BooleanContent Cnt =
         getBooleanContents(N0->getValueType(0));
     return DAG.getConstant(
-        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
+        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
+        VT);
   }
   }
 
@@ -1253,7 +1275,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           // (srl (ctlz x), 5) == 1  -> X == 0
           Cond = ISD::SETEQ;
         }
-        SDValue Zero = DAG.getConstant(0, N0.getValueType());
+        SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
                             Zero, Cond);
       }
@@ -1274,10 +1296,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // (ctpop x) u> 1 -> (x & x-1) != 0
       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
         SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
-                                  DAG.getConstant(1, CTVT));
+                                  DAG.getConstant(1, dl, CTVT));
         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
-        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
       }
 
       // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
@@ -1331,7 +1353,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
           // Will get folded away.
           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
-          SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+          SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
         }
       }
@@ -1382,7 +1404,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           SDValue Ptr = Lod->getBasePtr();
           if (bestOffset != 0)
             Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
-                              DAG.getConstant(bestOffset, PtrType));
+                              DAG.getConstant(bestOffset, dl, PtrType));
           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
                                 Lod->getPointerInfo().getWithOffset(bestOffset),
@@ -1390,8 +1412,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           return DAG.getSetCC(dl, VT,
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
-                                                      newVT)),
-                              DAG.getConstant(0LL, newVT), Cond);
+                                                      dl, newVT)),
+                              DAG.getConstant(0LL, dl, newVT), Cond);
         }
       }
     }
@@ -1407,18 +1429,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         switch (Cond) {
         case ISD::SETUGT:
         case ISD::SETUGE:
-        case ISD::SETEQ: return DAG.getConstant(0, VT);
+        case ISD::SETEQ: return DAG.getConstant(0, dl, VT);
         case ISD::SETULT:
         case ISD::SETULE:
-        case ISD::SETNE: return DAG.getConstant(1, VT);
+        case ISD::SETNE: return DAG.getConstant(1, dl, VT);
         case ISD::SETGT:
         case ISD::SETGE:
           // True if the sign bit of C1 is set.
-          return DAG.getConstant(C1.isNegative(), VT);
+          return DAG.getConstant(C1.isNegative(), dl, VT);
         case ISD::SETLT:
         case ISD::SETLE:
           // True if the sign bit of C1 isn't set.
-          return DAG.getConstant(C1.isNonNegative(), VT);
+          return DAG.getConstant(C1.isNonNegative(), dl, VT);
         default:
           break;
         }
@@ -1437,7 +1459,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
             (isOperationLegal(ISD::SETCC, newVT) &&
              getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
           EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT);
-          SDValue NewConst = DAG.getConstant(C1.trunc(InSize), newVT);
+          SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
 
           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
                                           NewConst, Cond);
@@ -1458,7 +1480,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // If the constant doesn't fit into the number of bits for the source of
       // the sign extension, it is impossible for both sides to be equal.
       if (C1.getMinSignedBits() > ExtSrcTyBits)
-        return DAG.getConstant(Cond == ISD::SETNE, VT);
+        return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
 
       SDValue ZextOp;
       EVT Op0Ty = N0.getOperand(0).getValueType();
@@ -1467,7 +1489,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       } else {
         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
-                              DAG.getConstant(Imm, Op0Ty));
+                              DAG.getConstant(Imm, dl, Op0Ty));
       }
       if (!DCI.isCalledByLegalizer())
         DCI.AddToWorklist(ZextOp.getNode());
@@ -1476,7 +1498,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           DAG.getConstant(C1 & APInt::getLowBitsSet(
                                                               ExtDstTyBits,
                                                               ExtSrcTyBits),
-                                          ExtDstTy),
+                                          dl, ExtDstTy),
                           Cond);
     } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
@@ -1546,20 +1568,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (Op0.getValueType().bitsGT(VT))
             Op0 = DAG.getNode(ISD::AND, dl, VT,
                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
-                          DAG.getConstant(1, VT));
+                          DAG.getConstant(1, dl, VT));
           else if (Op0.getValueType().bitsLT(VT))
             Op0 = DAG.getNode(ISD::AND, dl, VT,
                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
-                        DAG.getConstant(1, VT));
+                        DAG.getConstant(1, dl, VT));
 
           return DAG.getSetCC(dl, VT, Op0,
-                              DAG.getConstant(0, Op0.getValueType()),
+                              DAG.getConstant(0, dl, Op0.getValueType()),
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
         }
         if (Op0.getOpcode() == ISD::AssertZext &&
             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
           return DAG.getSetCC(dl, VT, Op0,
-                              DAG.getConstant(0, Op0.getValueType()),
+                              DAG.getConstant(0, dl, Op0.getValueType()),
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
       }
     }
@@ -1576,7 +1598,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
-      if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+      if (C1 == MinVal) return DAG.getConstant(1, dl, VT);  // X >= MIN --> true
       // X >= C0 --> X > (C0 - 1)
       APInt C = C1 - 1;
       ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
@@ -1585,13 +1607,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
                                 isLegalICmpImmediate(C.getSExtValue())))) {
         return DAG.getSetCC(dl, VT, N0,
-                            DAG.getConstant(C, N1.getValueType()),
+                            DAG.getConstant(C, dl, N1.getValueType()),
                             NewCC);
       }
     }
 
     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
-      if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+      if (C1 == MaxVal) return DAG.getConstant(1, dl, VT);  // X <= MAX --> true
       // X <= C0 --> X < (C0 + 1)
       APInt C = C1 + 1;
       ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
@@ -1600,19 +1622,19 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
                                 isLegalICmpImmediate(C.getSExtValue())))) {
         return DAG.getSetCC(dl, VT, N0,
-                            DAG.getConstant(C, N1.getValueType()),
+                            DAG.getConstant(C, dl, N1.getValueType()),
                             NewCC);
       }
     }
 
     if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
-      return DAG.getConstant(0, VT);      // X < MIN --> false
+      return DAG.getConstant(0, dl, VT);      // X < MIN --> false
     if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
-      return DAG.getConstant(1, VT);      // X >= MIN --> true
+      return DAG.getConstant(1, dl, VT);      // X >= MIN --> true
     if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
-      return DAG.getConstant(0, VT);      // X > MAX --> false
+      return DAG.getConstant(0, dl, VT);      // X > MAX --> false
     if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
-      return DAG.getConstant(1, VT);      // X <= MAX --> true
+      return DAG.getConstant(1, dl, VT);      // X <= MAX --> true
 
     // Canonicalize setgt X, Min --> setne X, Min
     if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
@@ -1624,12 +1646,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // If we have setult X, 1, turn it into seteq X, 0
     if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
       return DAG.getSetCC(dl, VT, N0,
-                          DAG.getConstant(MinVal, N0.getValueType()),
+                          DAG.getConstant(MinVal, dl, N0.getValueType()),
                           ISD::SETEQ);
     // If we have setugt X, Max-1, turn it into seteq X, Max
     if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
       return DAG.getSetCC(dl, VT, N0,
-                          DAG.getConstant(MaxVal, N0.getValueType()),
+                          DAG.getConstant(MaxVal, dl, N0.getValueType()),
                           ISD::SETEQ);
 
     // If we have "setcc X, C0", check to see if we can shrink the immediate
@@ -1639,14 +1661,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (Cond == ISD::SETUGT &&
         C1 == APInt::getSignedMaxValue(OperandBitSize))
       return DAG.getSetCC(dl, VT, N0,
-                          DAG.getConstant(0, N1.getValueType()),
+                          DAG.getConstant(0, dl, N1.getValueType()),
                           ISD::SETLT);
 
     // SETULT X, SINTMIN  -> SETGT X, -1
     if (Cond == ISD::SETULT &&
         C1 == APInt::getSignedMinValue(OperandBitSize)) {
       SDValue ConstMinusOne =
-          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
                           N1.getValueType());
       return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
     }
@@ -1665,7 +1687,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (AndRHS->getAPIntValue().isPowerOf2()) {
             return DAG.getNode(ISD::TRUNCATE, dl, VT,
                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
-                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
+                                   ShiftTy)));
           }
         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
           // (X & 8) == 8  -->  (X & 8) >> 3
@@ -1673,7 +1696,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (C1.isPowerOf2()) {
             return DAG.getNode(ISD::TRUNCATE, dl, VT,
                                DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
-                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
+                                      DAG.getConstant(C1.logBase2(), dl,
+                                                      ShiftTy)));
           }
         }
       }
@@ -1692,8 +1716,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
               getPointerTy() : getShiftAmountTy(N0.getValueType());
             EVT CmpTy = N0.getValueType();
             SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
-                                        DAG.getConstant(ShiftBits, ShiftTy));
-            SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy);
+                                        DAG.getConstant(ShiftBits, dl,
+                                                        ShiftTy));
+            SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
             return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
           }
         }
@@ -1715,13 +1740,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           ShiftBits = C1.countTrailingZeros();
         }
         NewC = NewC.lshr(ShiftBits);
-        if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
+        if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
+          isLegalICmpImmediate(NewC.getSExtValue())) {
           EVT ShiftTy = DCI.isBeforeLegalize() ?
             getPointerTy() : getShiftAmountTy(N0.getValueType());
           EVT CmpTy = N0.getValueType();
           SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
-                                      DAG.getConstant(ShiftBits, ShiftTy));
-          SDValue CmpRHS = DAG.getConstant(NewC, CmpTy);
+                                      DAG.getConstant(ShiftBits, dl, ShiftTy));
+          SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
         }
       }
@@ -1740,9 +1766,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       switch (ISD::getUnorderedFlavor(Cond)) {
       default: llvm_unreachable("Unknown flavor!");
       case 0:  // Known false.
-        return DAG.getConstant(0, VT);
+        return DAG.getConstant(0, dl, VT);
       case 1:  // Known true.
-        return DAG.getConstant(1, VT);
+        return DAG.getConstant(1, dl, VT);
       case 2:  // Undefined.
         return DAG.getUNDEF(VT);
       }
@@ -1809,13 +1835,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // We can always fold X == X for integer setcc's.
     if (N0.getValueType().isInteger()) {
-      return DAG.getConstant(EqVal, VT);
+      return DAG.getConstant(EqVal, dl, VT);
     }
     unsigned UOF = ISD::getUnorderedFlavor(Cond);
     if (UOF == 2)   // FP operators that are undefined on NaNs.
-      return DAG.getConstant(EqVal, VT);
+      return DAG.getConstant(EqVal, dl, VT);
     if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
-      return DAG.getConstant(EqVal, VT);
+      return DAG.getConstant(EqVal, dl, VT);
     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
     // if it is not already.
     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
@@ -1856,7 +1882,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
             return DAG.getSetCC(dl, VT, N0.getOperand(0),
                                 DAG.getConstant(RHSC->getAPIntValue()-
                                                 LHSR->getAPIntValue(),
-                                N0.getValueType()), Cond);
+                                dl, N0.getValueType()), Cond);
           }
 
           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
@@ -1868,7 +1894,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                 DAG.getSetCC(dl, VT, N0.getOperand(0),
                              DAG.getConstant(LHSR->getAPIntValue() ^
                                                RHSC->getAPIntValue(),
-                                             N0.getValueType()),
+                                             dl, N0.getValueType()),
                              Cond);
         }
 
@@ -1879,7 +1905,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
               DAG.getSetCC(dl, VT, N0.getOperand(1),
                            DAG.getConstant(SUBC->getAPIntValue() -
                                              RHSC->getAPIntValue(),
-                                           N0.getValueType()),
+                                           dl, N0.getValueType()),
                            Cond);
           }
         }
@@ -1896,16 +1922,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
         if (N0.getOperand(0) == N1)
           return DAG.getSetCC(dl, VT, N0.getOperand(1),
-                              DAG.getConstant(0, N0.getValueType()), Cond);
+                              DAG.getConstant(0, dl, N0.getValueType()), Cond);
         if (N0.getOperand(1) == N1) {
           if (DAG.isCommutativeBinOp(N0.getOpcode()))
             return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                                DAG.getConstant(0, N0.getValueType()), Cond);
+                                DAG.getConstant(0, dl, N0.getValueType()),
+                                Cond);
           if (N0.getNode()->hasOneUse()) {
             assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
             // (Z-X) == X  --> Z == X<<1
             SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
-                       DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
+                       DAG.getConstant(1, dl,
+                                       getShiftAmountTy(N1.getValueType())));
             if (!DCI.isCalledByLegalizer())
               DCI.AddToWorklist(SH.getNode());
             return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
@@ -1919,16 +1947,17 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // Simplify  X == (X+Z) -->  Z == 0
       if (N1.getOperand(0) == N0)
         return DAG.getSetCC(dl, VT, N1.getOperand(1),
-                        DAG.getConstant(0, N1.getValueType()), Cond);
+                        DAG.getConstant(0, dl, N1.getValueType()), Cond);
       if (N1.getOperand(1) == N0) {
         if (DAG.isCommutativeBinOp(N1.getOpcode()))
           return DAG.getSetCC(dl, VT, N1.getOperand(0),
-                          DAG.getConstant(0, N1.getValueType()), Cond);
+                          DAG.getConstant(0, dl, N1.getValueType()), Cond);
         if (N1.getNode()->hasOneUse()) {
           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
           // X == (Z-X)  --> X<<1 == Z
           SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
-                       DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
+                       DAG.getConstant(1, dl,
+                                       getShiftAmountTy(N0.getValueType())));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
           return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
@@ -1946,7 +1975,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
           if (DCI.isBeforeLegalizeOps() ||
               isCondCodeLegal(Cond, N0.getSimpleValueType())) {
-            SDValue Zero = DAG.getConstant(0, N1.getValueType());
+            SDValue Zero = DAG.getConstant(0, dl, N1.getValueType());
             return DAG.getSetCC(dl, VT, N0, Zero, Cond);
           }
         }
@@ -1957,7 +1986,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
           if (DCI.isBeforeLegalizeOps() ||
               isCondCodeLegal(Cond, N1.getSimpleValueType())) {
-            SDValue Zero = DAG.getConstant(0, N0.getValueType());
+            SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
             return DAG.getSetCC(dl, VT, N1, Zero, Cond);
           }
         }
@@ -2172,7 +2201,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         // now; without this it would get ZExt'd later in
         // ScheduleDAGSDNodes::EmitNode, which is very generic.
         Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
-                                            MVT::i64));
+                                            SDLoc(C), MVT::i64));
         return;
       }
     }
@@ -2181,9 +2210,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   }
 }
 
-std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint,
-                             MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
+                                             const std::string &Constraint,
+                                             MVT VT) const {
   if (Constraint.empty() || Constraint[0] != '{')
     return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
   assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
@@ -2195,8 +2225,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
     std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
 
   // Figure out which register class contains this reg.
-  const TargetRegisterInfo *RI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
   for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
        E = RI->regclass_end(); RCI != E; ++RCI) {
     const TargetRegisterClass *RC = *RCI;
@@ -2249,8 +2277,9 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
 /// and also tie in the associated operand values.
 /// If this returns an empty vector, and if the constraint string itself
 /// isn't empty, there was an error parsing.
-TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
-    ImmutableCallSite CS) const {
+TargetLowering::AsmOperandInfoVector
+TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
+                                 ImmutableCallSite CS) const {
   /// ConstraintOperands - Information about all of the constraints.
   AsmOperandInfoVector ConstraintOperands;
   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
@@ -2341,7 +2370,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
   }
 
   // If we have multiple alternative constraints, select the best alternative.
-  if (ConstraintOperands.size()) {
+  if (!ConstraintOperands.empty()) {
     if (maCount) {
       unsigned bestMAIndex = 0;
       int bestWeight = -1;
@@ -2412,12 +2441,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-        std::pair<unsigned, const TargetRegisterClass*> MatchRC =
-          getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                       OpInfo.ConstraintVT);
-        std::pair<unsigned, const TargetRegisterClass*> InputRC =
-          getRegForInlineAsmConstraint(Input.ConstraintCode,
-                                       Input.ConstraintVT);
+        std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+            getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+                                         OpInfo.ConstraintVT);
+        std::pair<unsigned, const TargetRegisterClass *> InputRC =
+            getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+                                         Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
             (MatchRC.second != InputRC.second)) {
@@ -2640,7 +2669,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
   unsigned ShAmt = d.countTrailingZeros();
   if (ShAmt) {
     // TODO: For UDIV use SRL instead of SRA.
-    SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+    SDValue Amt =
+        DAG.getConstant(ShAmt, dl, getShiftAmountTy(Op1.getValueType()));
     Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false,
                       true);
     d = d.ashr(ShAmt);
@@ -2651,7 +2681,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
   while ((t = d*xn) != 1)
     xn *= APInt(d.getBitWidth(), 2) - t;
 
-  Op2 = DAG.getConstant(xn, Op1.getValueType());
+  Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
   return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
 }
 
@@ -2680,12 +2710,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
                             isOperationLegalOrCustom(ISD::MULHS, VT))
     Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
-                    DAG.getConstant(magics.m, VT));
+                    DAG.getConstant(magics.m, dl, VT));
   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
                                  isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
     Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
                               N->getOperand(0),
-                              DAG.getConstant(magics.m, VT)).getNode(), 1);
+                              DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
   else
     return SDValue();       // No mulhs or equvialent
   // If d > 0 and m < 0, add the numerator
@@ -2701,12 +2731,13 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
   // Shift right algebraic if shift value is nonzero
   if (magics.s > 0) {
     Q = DAG.getNode(ISD::SRA, dl, VT, Q,
-                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+                    DAG.getConstant(magics.s, dl,
+                                    getShiftAmountTy(Q.getValueType())));
     Created->push_back(Q.getNode());
   }
   // Extract the sign bit and add it to the quotient
   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q,
-                          DAG.getConstant(VT.getScalarSizeInBits() - 1,
+                          DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
                                           getShiftAmountTy(Q.getValueType())));
   Created->push_back(T.getNode());
   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
@@ -2740,7 +2771,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
   if (magics.a != 0 && !Divisor[0]) {
     unsigned Shift = Divisor.countTrailingZeros();
     Q = DAG.getNode(ISD::SRL, dl, VT, Q,
-                    DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
+                    DAG.getConstant(Shift, dl,
+                                    getShiftAmountTy(Q.getValueType())));
     Created->push_back(Q.getNode());
 
     // Get magic number for the shifted divisor.
@@ -2752,11 +2784,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
   // FIXME: We should support doing a MUL in a wider type
   if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
                             isOperationLegalOrCustom(ISD::MULHU, VT))
-    Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
+    Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT));
   else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
                                  isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
     Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
-                            DAG.getConstant(magics.m, VT)).getNode(), 1);
+                            DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
   else
     return SDValue();       // No mulhu or equvialent
 
@@ -2766,17 +2798,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
     assert(magics.s < Divisor.getBitWidth() &&
            "We shouldn't generate an undefined shift!");
     return DAG.getNode(ISD::SRL, dl, VT, Q,
-                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+                       DAG.getConstant(magics.s, dl,
+                                       getShiftAmountTy(Q.getValueType())));
   } else {
     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
     Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
-                      DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
+                      DAG.getConstant(1, dl,
+                                      getShiftAmountTy(NPQ.getValueType())));
     Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
     Created->push_back(NPQ.getNode());
     return DAG.getNode(ISD::SRL, dl, VT, NPQ,
-             DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
+                       DAG.getConstant(magics.s - 1, dl,
+                                       getShiftAmountTy(NPQ.getValueType())));
   }
 }
 
@@ -2863,7 +2898,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
         isOperationLegalOrCustom(ISD::SRL, VT) &&
         isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
       unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
-      SDValue Shift = DAG.getConstant(ShiftAmt, getShiftAmountTy(VT));
+      SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT));
       LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
       LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
       RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
@@ -2913,13 +2948,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
   // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
                                 VT.getSizeInBits());
-  SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
-  SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
-  SDValue Bias = DAG.getConstant(127, IntVT);
-  SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+  SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
+  SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
+  SDValue Bias = DAG.getConstant(127, dl, IntVT);
+  SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl,
                                      IntVT);
-  SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
-  SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+  SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
+  SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
 
   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
 
@@ -2935,7 +2970,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
 
   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
       DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
-      DAG.getConstant(0x00800000, IntVT));
+      DAG.getConstant(0x00800000, dl, IntVT));
 
   R = DAG.getZExtOrTrunc(R, dl, NVT);
 
@@ -2955,7 +2990,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
       DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
       Sign);
 
-  Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
-      DAG.getConstant(0, NVT), Ret, ISD::SETLT);
+  Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
+      DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
   return true;
 }
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
index 0be00f0..b12e943 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
@@ -38,416 +38,18 @@ using namespace llvm;
 #define DEBUG_TYPE "shadowstackgc"
 
 namespace {
-
-  class ShadowStackGC : public GCStrategy {
-    /// RootChain - This is the global linked-list that contains the chain of GC
-    /// roots.
-    GlobalVariable *Head;
-
-    /// StackEntryTy - Abstract type of a link in the shadow stack.
-    ///
-    StructType *StackEntryTy;
-    StructType *FrameMapTy;
-
-    /// Roots - GC roots in the current function. Each is a pair of the
-    /// intrinsic call and its corresponding alloca.
-    std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
-
-  public:
-    ShadowStackGC();
-
-    bool initializeCustomLowering(Module &M) override;
-    bool performCustomLowering(Function &F) override;
-
-  private:
-    bool IsNullValue(Value *V);
-    Constant *GetFrameMap(Function &F);
-    Type* GetConcreteStackEntryType(Function &F);
-    void CollectRoots(Function &F);
-    static GetElementPtrInst *CreateGEP(LLVMContext &Context, 
-                                        IRBuilder<> &B, Value *BasePtr,
-                                        int Idx1, const char *Name);
-    static GetElementPtrInst *CreateGEP(LLVMContext &Context,
-                                        IRBuilder<> &B, Value *BasePtr,
-                                        int Idx1, int Idx2, const char *Name);
-  };
-
+class ShadowStackGC : public GCStrategy {
+public:
+  ShadowStackGC();
+};
 }
 
 static GCRegistry::Add<ShadowStackGC>
-X("shadow-stack", "Very portable GC for uncooperative code generators");
-
-namespace {
-  /// EscapeEnumerator - This is a little algorithm to find all escape points
-  /// from a function so that "finally"-style code can be inserted. In addition
-  /// to finding the existing return and unwind instructions, it also (if
-  /// necessary) transforms any call instructions into invokes and sends them to
-  /// a landing pad.
-  ///
-  /// It's wrapped up in a state machine using the same transform C# uses for
-  /// 'yield return' enumerators, This transform allows it to be non-allocating.
-  class EscapeEnumerator {
-    Function &F;
-    const char *CleanupBBName;
-
-    // State.
-    int State;
-    Function::iterator StateBB, StateE;
-    IRBuilder<> Builder;
-
-  public:
-    EscapeEnumerator(Function &F, const char *N = "cleanup")
-      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
-
-    IRBuilder<> *Next() {
-      switch (State) {
-      default:
-        return nullptr;
-
-      case 0:
-        StateBB = F.begin();
-        StateE = F.end();
-        State = 1;
-
-      case 1:
-        // Find all 'return', 'resume', and 'unwind' instructions.
-        while (StateBB != StateE) {
-          BasicBlock *CurBB = StateBB++;
-
-          // Branches and invokes do not escape, only unwind, resume, and return
-          // do.
-          TerminatorInst *TI = CurBB->getTerminator();
-          if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
-            continue;
-
-          Builder.SetInsertPoint(TI->getParent(), TI);
-          return &Builder;
-        }
-
-        State = 2;
-
-        // Find all 'call' instructions.
-        SmallVector<Instruction*,16> Calls;
-        for (Function::iterator BB = F.begin(),
-                                E = F.end(); BB != E; ++BB)
-          for (BasicBlock::iterator II = BB->begin(),
-                                    EE = BB->end(); II != EE; ++II)
-            if (CallInst *CI = dyn_cast<CallInst>(II))
-              if (!CI->getCalledFunction() ||
-                  !CI->getCalledFunction()->getIntrinsicID())
-                Calls.push_back(CI);
-
-        if (Calls.empty())
-          return nullptr;
+    X("shadow-stack", "Very portable GC for uncooperative code generators");
 
-        // Create a cleanup block.
-        LLVMContext &C = F.getContext();
-        BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
-        Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
-                                      Type::getInt32Ty(C), nullptr);
-        Constant *PersFn =
-          F.getParent()->
-          getOrInsertFunction("__gcc_personality_v0",
-                              FunctionType::get(Type::getInt32Ty(C), true));
-        LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1,
-                                                      "cleanup.lpad",
-                                                      CleanupBB);
-        LPad->setCleanup(true);
-        ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+void llvm::linkShadowStackGC() {}
 
-        // Transform the 'call' instructions into 'invoke's branching to the
-        // cleanup block. Go in reverse order to make prettier BB names.
-        SmallVector<Value*,16> Args;
-        for (unsigned I = Calls.size(); I != 0; ) {
-          CallInst *CI = cast<CallInst>(Calls[--I]);
-
-          // Split the basic block containing the function call.
-          BasicBlock *CallBB = CI->getParent();
-          BasicBlock *NewBB =
-            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
-
-          // Remove the unconditional branch inserted at the end of CallBB.
-          CallBB->getInstList().pop_back();
-          NewBB->getInstList().remove(CI);
-
-          // Create a new invoke instruction.
-          Args.clear();
-          CallSite CS(CI);
-          Args.append(CS.arg_begin(), CS.arg_end());
-
-          InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
-                                              NewBB, CleanupBB,
-                                              Args, CI->getName(), CallBB);
-          II->setCallingConv(CI->getCallingConv());
-          II->setAttributes(CI->getAttributes());
-          CI->replaceAllUsesWith(II);
-          delete CI;
-        }
-
-        Builder.SetInsertPoint(RI->getParent(), RI);
-        return &Builder;
-      }
-    }
-  };
-}
-
-// -----------------------------------------------------------------------------
-
-void llvm::linkShadowStackGC() { }
-
-ShadowStackGC::ShadowStackGC() : Head(nullptr), StackEntryTy(nullptr) {
+ShadowStackGC::ShadowStackGC() {
   InitRoots = true;
   CustomRoots = true;
 }
-
-Constant *ShadowStackGC::GetFrameMap(Function &F) {
-  // doInitialization creates the abstract type of this value.
-  Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
-
-  // Truncate the ShadowStackDescriptor if some metadata is null.
-  unsigned NumMeta = 0;
-  SmallVector<Constant*, 16> Metadata;
-  for (unsigned I = 0; I != Roots.size(); ++I) {
-    Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
-    if (!C->isNullValue())
-      NumMeta = I + 1;
-    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
-  }
-  Metadata.resize(NumMeta);
-
-  Type *Int32Ty = Type::getInt32Ty(F.getContext());
-  
-  Constant *BaseElts[] = {
-    ConstantInt::get(Int32Ty, Roots.size(), false),
-    ConstantInt::get(Int32Ty, NumMeta, false),
-  };
-
-  Constant *DescriptorElts[] = {
-    ConstantStruct::get(FrameMapTy, BaseElts),
-    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
-  };
-
-  Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
-  StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta));
-  
-  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
-
-  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
-  //        that, short of multithreaded LLVM, it should be safe; all that is
-  //        necessary is that a simple Module::iterator loop not be invalidated.
-  //        Appending to the GlobalVariable list is safe in that sense.
-  //
-  //        All of the output passes emit globals last. The ExecutionEngine
-  //        explicitly supports adding globals to the module after
-  //        initialization.
-  //
-  //        Still, if it isn't deemed acceptable, then this transformation needs
-  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
-  //        (which uses a FunctionPassManager (which segfaults (not asserts) if
-  //        provided a ModulePass))).
-  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
-                                    GlobalVariable::InternalLinkage,
-                                    FrameMap, "__gc_" + F.getName());
-
-  Constant *GEPIndices[2] = {
-                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
-                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
-                          };
-  return ConstantExpr::getGetElementPtr(GV, GEPIndices);
-}
-
-Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
-  // doInitialization creates the generic version of this type.
-  std::vector<Type*> EltTys;
-  EltTys.push_back(StackEntryTy);
-  for (size_t I = 0; I != Roots.size(); I++)
-    EltTys.push_back(Roots[I].second->getAllocatedType());
-  
-  return StructType::create(EltTys, "gc_stackentry."+F.getName().str());
-}
-
-/// doInitialization - If this module uses the GC intrinsics, find them now. If
-/// not, exit fast.
-bool ShadowStackGC::initializeCustomLowering(Module &M) {
-  // struct FrameMap {
-  //   int32_t NumRoots; // Number of roots in stack frame.
-  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
-  //   void *Meta[];     // May be absent for roots without metadata.
-  // };
-  std::vector<Type*> EltTys;
-  // 32 bits is ok up to a 32GB stack frame. :)
-  EltTys.push_back(Type::getInt32Ty(M.getContext()));
-  // Specifies length of variable length array. 
-  EltTys.push_back(Type::getInt32Ty(M.getContext()));
-  FrameMapTy = StructType::create(EltTys, "gc_map");
-  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
-
-  // struct StackEntry {
-  //   ShadowStackEntry *Next; // Caller's stack entry.
-  //   FrameMap *Map;          // Pointer to constant FrameMap.
-  //   void *Roots[];          // Stack roots (in-place array, so we pretend).
-  // };
-  
-  StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
-  
-  EltTys.clear();
-  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
-  EltTys.push_back(FrameMapPtrTy);
-  StackEntryTy->setBody(EltTys);
-  PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
-
-  // Get the root chain if it already exists.
-  Head = M.getGlobalVariable("llvm_gc_root_chain");
-  if (!Head) {
-    // If the root chain does not exist, insert a new one with linkonce
-    // linkage!
-    Head = new GlobalVariable(M, StackEntryPtrTy, false,
-                              GlobalValue::LinkOnceAnyLinkage,
-                              Constant::getNullValue(StackEntryPtrTy),
-                              "llvm_gc_root_chain");
-  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
-    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
-    Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
-  }
-
-  return true;
-}
-
-bool ShadowStackGC::IsNullValue(Value *V) {
-  if (Constant *C = dyn_cast<Constant>(V))
-    return C->isNullValue();
-  return false;
-}
-
-void ShadowStackGC::CollectRoots(Function &F) {
-  // FIXME: Account for original alignment. Could fragment the root array.
-  //   Approach 1: Null initialize empty slots at runtime. Yuck.
-  //   Approach 2: Emit a map of the array instead of just a count.
-
-  assert(Roots.empty() && "Not cleaned up?");
-
-  SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
-
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
-      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
-        if (Function *F = CI->getCalledFunction())
-          if (F->getIntrinsicID() == Intrinsic::gcroot) {
-            std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
-              CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
-            if (IsNullValue(CI->getArgOperand(1)))
-              Roots.push_back(Pair);
-            else
-              MetaRoots.push_back(Pair);
-          }
-
-  // Number roots with metadata (usually empty) at the beginning, so that the
-  // FrameMap::Meta array can be elided.
-  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
-}
-
-GetElementPtrInst *
-ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
-                         int Idx, int Idx2, const char *Name) {
-  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
-                       ConstantInt::get(Type::getInt32Ty(Context), Idx),
-                       ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
-  Value* Val = B.CreateGEP(BasePtr, Indices, Name);
-
-  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
-
-  return dyn_cast<GetElementPtrInst>(Val);
-}
-
-GetElementPtrInst *
-ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
-                         int Idx, const char *Name) {
-  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
-                       ConstantInt::get(Type::getInt32Ty(Context), Idx) };
-  Value *Val = B.CreateGEP(BasePtr, Indices, Name);
-
-  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
-
-  return dyn_cast<GetElementPtrInst>(Val);
-}
-
-/// runOnFunction - Insert code to maintain the shadow stack.
-bool ShadowStackGC::performCustomLowering(Function &F) {
-  LLVMContext &Context = F.getContext();
-  
-  // Find calls to llvm.gcroot.
-  CollectRoots(F);
-
-  // If there are no roots in this function, then there is no need to add a
-  // stack map entry for it.
-  if (Roots.empty())
-    return false;
-
-  // Build the constant map and figure the type of the shadow stack entry.
-  Value *FrameMap = GetFrameMap(F);
-  Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
-
-  // Build the shadow stack entry at the very start of the function.
-  BasicBlock::iterator IP = F.getEntryBlock().begin();
-  IRBuilder<> AtEntry(IP->getParent(), IP);
-
-  Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr,
-                                                 "gc_frame");
-
-  while (isa<AllocaInst>(IP)) ++IP;
-  AtEntry.SetInsertPoint(IP->getParent(), IP);
-
-  // Initialize the map pointer and load the current head of the shadow stack.
-  Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
-  Instruction *EntryMapPtr  = CreateGEP(Context, AtEntry, StackEntry,
-                                        0,1,"gc_frame.map");
-  AtEntry.CreateStore(FrameMap, EntryMapPtr);
-
-  // After all the allocas...
-  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
-    // For each root, find the corresponding slot in the aggregate...
-    Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
-
-    // And use it in lieu of the alloca.
-    AllocaInst *OriginalAlloca = Roots[I].second;
-    SlotPtr->takeName(OriginalAlloca);
-    OriginalAlloca->replaceAllUsesWith(SlotPtr);
-  }
-
-  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
-  // really necessary (the collector would never see the intermediate state at
-  // runtime), but it's nicer not to push the half-initialized entry onto the
-  // shadow stack.
-  while (isa<StoreInst>(IP)) ++IP;
-  AtEntry.SetInsertPoint(IP->getParent(), IP);
-
-  // Push the entry onto the shadow stack.
-  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
-                                        StackEntry,0,0,"gc_frame.next");
-  Instruction *NewHeadVal   = CreateGEP(Context, AtEntry, 
-                                        StackEntry, 0, "gc_newhead");
-  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
-  AtEntry.CreateStore(NewHeadVal, Head);
-
-  // For each instruction that escapes...
-  EscapeEnumerator EE(F, "gc_cleanup");
-  while (IRBuilder<> *AtExit = EE.Next()) {
-    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
-    // AtEntry, since that would make the value live for the entire function.
-    Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
-                                           "gc_frame.next");
-    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
-                       AtExit->CreateStore(SavedHead, Head);
-  }
-
-  // Delete the original allocas (which are no longer used) and the intrinsic
-  // calls (which are no longer valid). Doing this last avoids invalidating
-  // iterators.
-  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
-    Roots[I].first->eraseFromParent();
-    Roots[I].second->eraseFromParent();
-  }
-
-  Roots.clear();
-  return true;
-}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
new file mode 100644
index 0000000..7c0b2bb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -0,0 +1,460 @@
+//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom lowering code required by the shadow-stack GC
+// strategy.  
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "shadowstackgclowering"
+
+namespace {
+
+class ShadowStackGCLowering : public FunctionPass {
+  /// RootChain - This is the global linked-list that contains the chain of GC
+  /// roots.
+  GlobalVariable *Head;
+
+  /// StackEntryTy - Abstract type of a link in the shadow stack.
+  ///
+  StructType *StackEntryTy;
+  StructType *FrameMapTy;
+
+  /// Roots - GC roots in the current function. Each is a pair of the
+  /// intrinsic call and its corresponding alloca.
+  std::vector<std::pair<CallInst *, AllocaInst *>> Roots;
+
+public:
+  static char ID;
+  ShadowStackGCLowering();
+
+  bool doInitialization(Module &M) override;
+  bool runOnFunction(Function &F) override;
+
+private:
+  bool IsNullValue(Value *V);
+  Constant *GetFrameMap(Function &F);
+  Type *GetConcreteStackEntryType(Function &F);
+  void CollectRoots(Function &F);
+  static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+                                      Type *Ty, Value *BasePtr, int Idx1,
+                                      const char *Name);
+  static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+                                      Type *Ty, Value *BasePtr, int Idx1, int Idx2,
+                                      const char *Name);
+};
+}
+
+INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+                      "Shadow Stack GC Lowering", false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+                    "Shadow Stack GC Lowering", false, false)
+
+FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
+
+char ShadowStackGCLowering::ID = 0;
+
+ShadowStackGCLowering::ShadowStackGCLowering()
+  : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr),
+    FrameMapTy(nullptr) {
+  initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
+}
+
+namespace {
+/// EscapeEnumerator - This is a little algorithm to find all escape points
+/// from a function so that "finally"-style code can be inserted. In addition
+/// to finding the existing return and unwind instructions, it also (if
+/// necessary) transforms any call instructions into invokes and sends them to
+/// a landing pad.
+///
+/// It's wrapped up in a state machine using the same transform C# uses for
+/// 'yield return' enumerators, This transform allows it to be non-allocating.
+class EscapeEnumerator {
+  Function &F;
+  const char *CleanupBBName;
+
+  // State.
+  int State;
+  Function::iterator StateBB, StateE;
+  IRBuilder<> Builder;
+
+public:
+  EscapeEnumerator(Function &F, const char *N = "cleanup")
+      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+  IRBuilder<> *Next() {
+    switch (State) {
+    default:
+      return nullptr;
+
+    case 0:
+      StateBB = F.begin();
+      StateE = F.end();
+      State = 1;
+
+    case 1:
+      // Find all 'return', 'resume', and 'unwind' instructions.
+      while (StateBB != StateE) {
+        BasicBlock *CurBB = StateBB++;
+
+        // Branches and invokes do not escape, only unwind, resume, and return
+        // do.
+        TerminatorInst *TI = CurBB->getTerminator();
+        if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+          continue;
+
+        Builder.SetInsertPoint(TI->getParent(), TI);
+        return &Builder;
+      }
+
+      State = 2;
+
+      // Find all 'call' instructions.
+      SmallVector<Instruction *, 16> Calls;
+      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+        for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE;
+             ++II)
+          if (CallInst *CI = dyn_cast<CallInst>(II))
+            if (!CI->getCalledFunction() ||
+                !CI->getCalledFunction()->getIntrinsicID())
+              Calls.push_back(CI);
+
+      if (Calls.empty())
+        return nullptr;
+
+      // Create a cleanup block.
+      LLVMContext &C = F.getContext();
+      BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+      Type *ExnTy =
+          StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
+      Constant *PersFn = F.getParent()->getOrInsertFunction(
+          "__gcc_personality_v0", FunctionType::get(Type::getInt32Ty(C), true));
+      LandingPadInst *LPad =
+          LandingPadInst::Create(ExnTy, PersFn, 1, "cleanup.lpad", CleanupBB);
+      LPad->setCleanup(true);
+      ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+      // Transform the 'call' instructions into 'invoke's branching to the
+      // cleanup block. Go in reverse order to make prettier BB names.
+      SmallVector<Value *, 16> Args;
+      for (unsigned I = Calls.size(); I != 0;) {
+        CallInst *CI = cast<CallInst>(Calls[--I]);
+
+        // Split the basic block containing the function call.
+        BasicBlock *CallBB = CI->getParent();
+        BasicBlock *NewBB =
+            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+        // Remove the unconditional branch inserted at the end of CallBB.
+        CallBB->getInstList().pop_back();
+        NewBB->getInstList().remove(CI);
+
+        // Create a new invoke instruction.
+        Args.clear();
+        CallSite CS(CI);
+        Args.append(CS.arg_begin(), CS.arg_end());
+
+        InvokeInst *II =
+            InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args,
+                               CI->getName(), CallBB);
+        II->setCallingConv(CI->getCallingConv());
+        II->setAttributes(CI->getAttributes());
+        CI->replaceAllUsesWith(II);
+        delete CI;
+      }
+
+      Builder.SetInsertPoint(RI->getParent(), RI);
+      return &Builder;
+    }
+  }
+};
+}
+
+
+Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
+  // doInitialization creates the abstract type of this value.
+  Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+  // Truncate the ShadowStackDescriptor if some metadata is null.
+  unsigned NumMeta = 0;
+  SmallVector<Constant *, 16> Metadata;
+  for (unsigned I = 0; I != Roots.size(); ++I) {
+    Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+    if (!C->isNullValue())
+      NumMeta = I + 1;
+    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+  }
+  Metadata.resize(NumMeta);
+
+  Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+  Constant *BaseElts[] = {
+      ConstantInt::get(Int32Ty, Roots.size(), false),
+      ConstantInt::get(Int32Ty, NumMeta, false),
+  };
+
+  Constant *DescriptorElts[] = {
+      ConstantStruct::get(FrameMapTy, BaseElts),
+      ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)};
+
+  Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()};
+  StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta));
+
+  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+  //        that, short of multithreaded LLVM, it should be safe; all that is
+  //        necessary is that a simple Module::iterator loop not be invalidated.
+  //        Appending to the GlobalVariable list is safe in that sense.
+  //
+  //        All of the output passes emit globals last. The ExecutionEngine
+  //        explicitly supports adding globals to the module after
+  //        initialization.
+  //
+  //        Still, if it isn't deemed acceptable, then this transformation needs
+  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
+  //        (which uses a FunctionPassManager (which segfaults (not asserts) if
+  //        provided a ModulePass))).
+  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+                                    GlobalVariable::InternalLinkage, FrameMap,
+                                    "__gc_" + F.getName());
+
+  Constant *GEPIndices[2] = {
+      ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+      ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
+  return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices);
+}
+
+Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
+  // doInitialization creates the generic version of this type.
+  std::vector<Type *> EltTys;
+  EltTys.push_back(StackEntryTy);
+  for (size_t I = 0; I != Roots.size(); I++)
+    EltTys.push_back(Roots[I].second->getAllocatedType());
+
+  return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGCLowering::doInitialization(Module &M) {
+  bool Active = false;
+  for (Function &F : M) {
+    if (F.hasGC() && F.getGC() == std::string("shadow-stack")) {
+      Active = true;
+      break;
+    }
+  }
+  if (!Active)
+    return false;
+  
+  // struct FrameMap {
+  //   int32_t NumRoots; // Number of roots in stack frame.
+  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
+  //   void *Meta[];     // May be absent for roots without metadata.
+  // };
+  std::vector<Type *> EltTys;
+  // 32 bits is ok up to a 32GB stack frame. :)
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  // Specifies length of variable length array.
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  FrameMapTy = StructType::create(EltTys, "gc_map");
+  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+  // struct StackEntry {
+  //   ShadowStackEntry *Next; // Caller's stack entry.
+  //   FrameMap *Map;          // Pointer to constant FrameMap.
+  //   void *Roots[];          // Stack roots (in-place array, so we pretend).
+  // };
+
+  StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+
+  EltTys.clear();
+  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+  EltTys.push_back(FrameMapPtrTy);
+  StackEntryTy->setBody(EltTys);
+  PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+  // Get the root chain if it already exists.
+  Head = M.getGlobalVariable("llvm_gc_root_chain");
+  if (!Head) {
+    // If the root chain does not exist, insert a new one with linkonce
+    // linkage!
+    Head = new GlobalVariable(
+        M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage,
+        Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain");
+  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+    Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+  }
+
+  return true;
+}
+
+bool ShadowStackGCLowering::IsNullValue(Value *V) {
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C->isNullValue();
+  return false;
+}
+
+void ShadowStackGCLowering::CollectRoots(Function &F) {
+  // FIXME: Account for original alignment. Could fragment the root array.
+  //   Approach 1: Null initialize empty slots at runtime. Yuck.
+  //   Approach 2: Emit a map of the array instead of just a count.
+
+  assert(Roots.empty() && "Not cleaned up?");
+
+  SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+        if (Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::gcroot) {
+            std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
+                CI,
+                cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+            if (IsNullValue(CI->getArgOperand(1)))
+              Roots.push_back(Pair);
+            else
+              MetaRoots.push_back(Pair);
+          }
+
+  // Number roots with metadata (usually empty) at the beginning, so that the
+  // FrameMap::Meta array can be elided.
+  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+                                                    IRBuilder<> &B, Type *Ty,
+                                                    Value *BasePtr, int Idx,
+                                                    int Idx2,
+                                                    const char *Name) {
+  Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+                      ConstantInt::get(Type::getInt32Ty(Context), Idx),
+                      ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
+  Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
+
+  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+  return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+                                            IRBuilder<> &B, Type *Ty, Value *BasePtr,
+                                            int Idx, const char *Name) {
+  Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+                      ConstantInt::get(Type::getInt32Ty(Context), Idx)};
+  Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
+
+  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+  return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGCLowering::runOnFunction(Function &F) {
+  // Quick exit for functions that do not use the shadow stack GC.
+  if (!F.hasGC() ||
+      F.getGC() != std::string("shadow-stack"))
+    return false;
+  
+  LLVMContext &Context = F.getContext();
+
+  // Find calls to llvm.gcroot.
+  CollectRoots(F);
+
+  // If there are no roots in this function, then there is no need to add a
+  // stack map entry for it.
+  if (Roots.empty())
+    return false;
+
+  // Build the constant map and figure the type of the shadow stack entry.
+  Value *FrameMap = GetFrameMap(F);
+  Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+  // Build the shadow stack entry at the very start of the function.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  IRBuilder<> AtEntry(IP->getParent(), IP);
+
+  Instruction *StackEntry =
+      AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");
+
+  while (isa<AllocaInst>(IP))
+    ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+  // Initialize the map pointer and load the current head of the shadow stack.
+  Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+  Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+                                       StackEntry, 0, 1, "gc_frame.map");
+  AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+  // After all the allocas...
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    // For each root, find the corresponding slot in the aggregate...
+    Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+                               StackEntry, 1 + I, "gc_root");
+
+    // And use it in lieu of the alloca.
+    AllocaInst *OriginalAlloca = Roots[I].second;
+    SlotPtr->takeName(OriginalAlloca);
+    OriginalAlloca->replaceAllUsesWith(SlotPtr);
+  }
+
+  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+  // really necessary (the collector would never see the intermediate state at
+  // runtime), but it's nicer not to push the half-initialized entry onto the
+  // shadow stack.
+  while (isa<StoreInst>(IP))
+    ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+  // Push the entry onto the shadow stack.
+  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+                                        StackEntry, 0, 0, "gc_frame.next");
+  Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+                                      StackEntry, 0, "gc_newhead");
+  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+  AtEntry.CreateStore(NewHeadVal, Head);
+
+  // For each instruction that escapes...
+  EscapeEnumerator EE(F, "gc_cleanup");
+  while (IRBuilder<> *AtExit = EE.Next()) {
+    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+    // AtEntry, since that would make the value live for the entire function.
+    Instruction *EntryNextPtr2 =
+        CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
+                  "gc_frame.next");
+    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+    AtExit->CreateStore(SavedHead, Head);
+  }
+
+  // Delete the original allocas (which are no longer used) and the intrinsic
+  // calls (which are no longer valid). Doing this last avoids invalidating
+  // iterators.
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    Roots[I].first->eraseFromParent();
+    Roots[I].second->eraseFromParent();
+  }
+
+  Roots.clear();
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
new file mode 100644
index 0000000..4463cc7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -0,0 +1,388 @@
+//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for safe point where the prologue and epilogue can be
+// inserted.
+// The safe point for the prologue (resp. epilogue) is called Save
+// (resp. Restore).
+// A point is safe for prologue (resp. epilogue) if and only if
+// it 1) dominates (resp. post-dominates) all the frame related operations and
+// between 2) two executions of the Save (resp. Restore) point there is an
+// execution of the Restore (resp. Save) point.
+//
+// For instance, the following points are safe:
+// for (int i = 0; i < 10; ++i) {
+//   Save
+//   ...
+//   Restore
+// }
+// Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
+// And the following points are not:
+// for (int i = 0; i < 10; ++i) {
+//   Save
+//   ...
+// }
+// for (int i = 0; i < 10; ++i) {
+//   ...
+//   Restore
+// }
+// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
+//
+// This pass also ensures that the safe points are 3) cheaper than the regular
+// entry and exits blocks.
+//
+// Property #1 is ensured via the use of MachineDominatorTree and
+// MachinePostDominatorTree.
+// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
+// points must be in the same loop.
+// Property #3 is ensured via the MachineBlockFrequencyInfo.
+//
+// If this pass found points matching all this properties, then
+// MachineFrameInfo is updated this that information.
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/Statistic.h"
+// To check for profitability.
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+// For property #1 for Save.
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+// To record the result of the analysis.
+#include "llvm/CodeGen/MachineFrameInfo.h"
+// For property #2.
+#include "llvm/CodeGen/MachineLoopInfo.h"
+// For property #1 for Restore.
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+// To know about callee-saved.
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/Debug.h"
+// To query the target about frame lowering.
+#include "llvm/Target/TargetFrameLowering.h"
+// To know about frame setup operation.
+#include "llvm/Target/TargetInstrInfo.h"
+// To access TargetInstrInfo.
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "shrink-wrap"
+
+using namespace llvm;
+
+STATISTIC(NumFunc, "Number of functions");
+STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
+STATISTIC(NumCandidatesDropped,
+          "Number of shrink-wrapping candidates dropped because of frequency");
+
+namespace {
+/// \brief Class to determine where the safe point to insert the
+/// prologue and epilogue are.
+/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
+/// shrink-wrapping term for prologue/epilogue placement, this pass
+/// does not rely on expensive data-flow analysis. Instead we use the
+/// dominance properties and loop information to decide which point
+/// are safe for such insertion.
+class ShrinkWrap : public MachineFunctionPass {
+  /// Hold callee-saved information.
+  RegisterClassInfo RCI;
+  MachineDominatorTree *MDT;
+  MachinePostDominatorTree *MPDT;
+  /// Current safe point found for the prologue.
+  /// The prologue will be inserted before the first instruction
+  /// in this basic block.
+  MachineBasicBlock *Save;
+  /// Current safe point found for the epilogue.
+  /// The epilogue will be inserted before the first terminator instruction
+  /// in this basic block.
+  MachineBasicBlock *Restore;
+  /// Hold the information of the basic block frequency.
+  /// Use to check the profitability of the new points.
+  MachineBlockFrequencyInfo *MBFI;
+  /// Hold the loop information. Used to determine if Save and Restore
+  /// are in the same loop.
+  MachineLoopInfo *MLI;
+  /// Frequency of the Entry block.
+  uint64_t EntryFreq;
+  /// Current opcode for frame setup.
+  unsigned FrameSetupOpcode;
+  /// Current opcode for frame destroy.
+  unsigned FrameDestroyOpcode;
+  /// Entry block.
+  const MachineBasicBlock *Entry;
+
+  /// \brief Check if \p MI uses or defines a callee-saved register or
+  /// a frame index. If this is the case, this means \p MI must happen
+  /// after Save and before Restore.
+  bool useOrDefCSROrFI(const MachineInstr &MI) const;
+
+  /// \brief Update the Save and Restore points such that \p MBB is in
+  /// the region that is dominated by Save and post-dominated by Restore
+  /// and Save and Restore still match the safe point definition.
+  /// Such point may not exist and Save and/or Restore may be null after
+  /// this call.
+  void updateSaveRestorePoints(MachineBasicBlock &MBB);
+
+  /// \brief Initialize the pass for \p MF.
+  void init(MachineFunction &MF) {
+    RCI.runOnMachineFunction(MF);
+    MDT = &getAnalysis<MachineDominatorTree>();
+    MPDT = &getAnalysis<MachinePostDominatorTree>();
+    Save = nullptr;
+    Restore = nullptr;
+    MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+    MLI = &getAnalysis<MachineLoopInfo>();
+    EntryFreq = MBFI->getEntryFreq();
+    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+    FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+    FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+    Entry = &MF.front();
+
+    ++NumFunc;
+  }
+
+  /// Check whether or not Save and Restore points are still interesting for
+  /// shrink-wrapping.
+  bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
+
+public:
+  static char ID;
+
+  ShrinkWrap() : MachineFunctionPass(ID) {
+    initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<MachinePostDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const override {
+    return "Shrink Wrapping analysis";
+  }
+
+  /// \brief Perform the shrink-wrapping analysis and update
+  /// the MachineFrameInfo attached to \p MF with the results.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char ShrinkWrap::ID = 0;
+char &llvm::ShrinkWrapID = ShrinkWrap::ID;
+
+INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
+
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const {
+  if (MI.getOpcode() == FrameSetupOpcode ||
+      MI.getOpcode() == FrameDestroyOpcode) {
+    DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
+    return true;
+  }
+  for (const MachineOperand &MO : MI.operands()) {
+    bool UseCSR = false;
+    if (MO.isReg()) {
+      unsigned PhysReg = MO.getReg();
+      if (!PhysReg)
+        continue;
+      assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+             "Unallocated register?!");
+      UseCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+    }
+    // TODO: Handle regmask more accurately.
+    // For now, be conservative about them.
+    if (UseCSR || MO.isFI() || MO.isRegMask()) {
+      DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI()
+                   << "): " << MI << '\n');
+      return true;
+    }
+  }
+  return false;
+}
+
+/// \brief Helper function to find the immediate (post) dominator.
+template <typename ListOfBBs, typename DominanceAnalysis>
+MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
+                            DominanceAnalysis &Dom) {
+  MachineBasicBlock *IDom = &Block;
+  for (MachineBasicBlock *BB : BBs) {
+    IDom = Dom.findNearestCommonDominator(IDom, BB);
+    if (!IDom)
+      break;
+  }
+  return IDom;
+}
+
+void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
+  // Get rid of the easy cases first.
+  if (!Save)
+    Save = &MBB;
+  else
+    Save = MDT->findNearestCommonDominator(Save, &MBB);
+
+  if (!Save) {
+    DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
+    return;
+  }
+
+  if (!Restore)
+    Restore = &MBB;
+  else
+    Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
+
+  // Make sure we would be able to insert the restore code before the
+  // terminator.
+  if (Restore == &MBB) {
+    for (const MachineInstr &Terminator : MBB.terminators()) {
+      if (!useOrDefCSROrFI(Terminator))
+        continue;
+      // One of the terminator needs to happen before the restore point.
+      if (MBB.succ_empty()) {
+        Restore = nullptr;
+        break;
+      }
+      // Look for a restore point that post-dominates all the successors.
+      // The immediate post-dominator is what we are looking for.
+      Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+      break;
+    }
+  }
+
+  if (!Restore) {
+    DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n");
+    return;
+  }
+
+  // Make sure Save and Restore are suitable for shrink-wrapping:
+  // 1. all path from Save needs to lead to Restore before exiting.
+  // 2. all path to Restore needs to go through Save from Entry.
+  // We achieve that by making sure that:
+  // A. Save dominates Restore.
+  // B. Restore post-dominates Save.
+  // C. Save and Restore are in the same loop.
+  bool SaveDominatesRestore = false;
+  bool RestorePostDominatesSave = false;
+  while (Save && Restore &&
+         (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
+          !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
+          MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) {
+    // Fix (A).
+    if (!SaveDominatesRestore) {
+      Save = MDT->findNearestCommonDominator(Save, Restore);
+      continue;
+    }
+    // Fix (B).
+    if (!RestorePostDominatesSave)
+      Restore = MPDT->findNearestCommonDominator(Restore, Save);
+
+    // Fix (C).
+    if (Save && Restore && Save != Restore &&
+        MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) {
+      if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore))
+        // Push Save outside of this loop.
+        Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+      else
+        // Push Restore outside of this loop.
+        Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+    }
+  }
+}
+
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+  if (MF.empty())
+    return false;
+  DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+  init(MF);
+
+  for (MachineBasicBlock &MBB : MF) {
+    DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName()
+                 << '\n');
+
+    for (const MachineInstr &MI : MBB) {
+      if (!useOrDefCSROrFI(MI))
+        continue;
+      // Save (resp. restore) point must dominate (resp. post dominate)
+      // MI. Look for the proper basic block for those.
+      updateSaveRestorePoints(MBB);
+      // If we are at a point where we cannot improve the placement of
+      // save/restore instructions, just give up.
+      if (!ArePointsInteresting()) {
+        DEBUG(dbgs() << "No Shrink wrap candidate found\n");
+        return false;
+      }
+      // No need to look for other instructions, this basic block
+      // will already be part of the handled region.
+      break;
+    }
+  }
+  if (!ArePointsInteresting()) {
+    // If the points are not interesting at this point, then they must be null
+    // because it means we did not encounter any frame/CSR related code.
+    // Otherwise, we would have returned from the previous loop.
+    assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
+    DEBUG(dbgs() << "Nothing to shrink-wrap\n");
+    return false;
+  }
+
+  DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
+               << '\n');
+
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  do {
+    DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
+                 << Save->getNumber() << ' ' << Save->getName() << ' '
+                 << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: "
+                 << Restore->getNumber() << ' ' << Restore->getName() << ' '
+                 << MBFI->getBlockFreq(Restore).getFrequency() << '\n');
+
+    bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
+    if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) &&
+         EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+        ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
+         TFI->canUseAsEpilogue(*Restore)))
+      break;
+    DEBUG(dbgs() << "New points are too expensive or invalid for the target\n");
+    MachineBasicBlock *NewBB;
+    if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {
+      Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+      if (!Save)
+        break;
+      NewBB = Save;
+    } else {
+      // Restore is expensive.
+      Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+      if (!Restore)
+        break;
+      NewBB = Restore;
+    }
+    updateSaveRestorePoints(*NewBB);
+  } while (Save && Restore);
+
+  if (!ArePointsInteresting()) {
+    ++NumCandidatesDropped;
+    return false;
+  }
+
+  DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber()
+               << ' ' << Save->getName() << "\nRestore: "
+               << Restore->getNumber() << ' ' << Restore->getName() << '\n');
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setSavePoint(Save);
+  MFI->setRestorePoint(Restore);
+  ++NumCandidates;
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 7fd8107..42d277e 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -46,6 +46,8 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 namespace {
 class SjLjEHPrepare : public FunctionPass {
   const TargetMachine *TM;
+  Type *doubleUnderDataTy;
+  Type *doubleUnderJBufTy;
   Type *FunctionContextTy;
   Constant *RegisterFn;
   Constant *UnregisterFn;
@@ -93,12 +95,14 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
   // builtin_setjmp uses a five word jbuf
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   Type *Int32Ty = Type::getInt32Ty(M.getContext());
-  FunctionContextTy = StructType::get(VoidPtrTy,                  // __prev
-                                      Int32Ty,                    // call_site
-                                      ArrayType::get(Int32Ty, 4), // __data
-                                      VoidPtrTy, // __personality
-                                      VoidPtrTy, // __lsda
-                                      ArrayType::get(VoidPtrTy, 5), // __jbuf
+  doubleUnderDataTy = ArrayType::get(Int32Ty, 4);
+  doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5);
+  FunctionContextTy = StructType::get(VoidPtrTy,         // __prev
+                                      Int32Ty,           // call_site
+                                      doubleUnderDataTy, // __data
+                                      VoidPtrTy,         // __personality
+                                      VoidPtrTy,         // __lsda
+                                      doubleUnderJBufTy, // __jbuf
                                       nullptr);
   RegisterFn = M.getOrInsertFunction(
       "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
@@ -128,7 +132,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
   Value *Zero = ConstantInt::get(Int32Ty, 0);
   Value *One = ConstantInt::get(Int32Ty, 1);
   Value *Idxs[2] = { Zero, One };
-  Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
+  Value *CallSite =
+      Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site");
 
   // Insert a store of the call-site number
   ConstantInt *CallSiteNoC =
@@ -191,7 +196,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   // Create an alloca for the incoming jump buffer ptr and the new jump buffer
   // that needs to be restored on all exits from the function. This is an alloca
   // because the value needs to be added to the global context list.
-  const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
+  const TargetLowering *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
   unsigned Align =
       TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
   FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
@@ -203,16 +208,17 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
     IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
 
     // Reference the __data field.
-    Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
+    Value *FCData =
+        Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
 
     // The exception values come back in context->__data[0].
-    Value *ExceptionAddr =
-        Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep");
+    Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+                                                      0, 0, "exception_gep");
     Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
     ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
 
-    Value *SelectorAddr =
-        Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep");
+    Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+                                                     0, 1, "exn_selector_gep");
     Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
     substituteLPadValues(LPI, ExnVal, SelVal);
@@ -222,15 +228,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   IRBuilder<> Builder(EntryBB->getTerminator());
   if (!PersonalityFn)
     PersonalityFn = LPads[0]->getPersonalityFn();
-  Value *PersonalityFieldPtr =
-      Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep");
+  Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
+      FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
   Builder.CreateStore(
       Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()),
       PersonalityFieldPtr, /*isVolatile=*/true);
 
   // LSDA address
-  Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
-  Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep");
+  Value *LSDA = Builder.CreateCall(LSDAAddrFn, {}, "lsda_addr");
+  Value *LSDAFieldPtr =
+      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep");
   Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
 
   return FuncCtx;
@@ -399,18 +406,21 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   IRBuilder<> Builder(EntryBB->getTerminator());
 
   // Get a reference to the jump buffer.
-  Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");
+  Value *JBufPtr =
+      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep");
 
   // Save the frame pointer.
-  Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");
+  Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0,
+                                               "jbuf_fp_gep");
 
   Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
   Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
 
   // Save the stack pointer.
-  Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");
+  Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2,
+                                               "jbuf_sp_gep");
 
-  Val = Builder.CreateCall(StackAddrFn, "sp");
+  Val = Builder.CreateCall(StackAddrFn, {}, "sp");
   Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
 
   // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index d46621d..025ae70 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -127,7 +127,7 @@ void SlotIndexes::renumberIndexes() {
 void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
   // Number indexes with half the default spacing so we can catch up quickly.
   const unsigned Space = SlotIndex::InstrDist/2;
-  assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+  static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM");
 
   IndexList::iterator startItr = std::prev(curItr);
   unsigned index = startItr->getIndex();
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
index 622361e..03dd58d 100644
--- a/contrib/llvm/lib/CodeGen/SpillPlacement.h
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -70,7 +70,7 @@ public:
   static char ID; // Pass identification, replacement for typeid.
 
   SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {}
-  ~SpillPlacement() { releaseMemory(); }
+  ~SpillPlacement() override { releaseMemory(); }
 
   /// BorderConstraint - A basic block has separate constraints for entry and
   /// exit.
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 4c8801a..dab1dfe 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -623,8 +623,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
   AssignI.setMap(RegAssign);
 
   for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
-    VNInfo *VNI = Copies[i];
-    SlotIndex Def = VNI->def;
+    SlotIndex Def = Copies[i]->def;
     MachineInstr *MI = LIS.getInstructionFromIndex(Def);
     assert(MI && "No instruction for back-copy");
 
@@ -635,13 +634,12 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
     while (!AtBegin && (--MBBI)->isDebugValue());
 
     DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
-    LI->removeValNo(VNI);
+    LIS.removeVRegDefAt(*LI, Def);
     LIS.RemoveMachineInstrFromMaps(MI);
     MI->eraseFromParent();
 
-    // Adjust RegAssign if a register assignment is killed at VNI->def.  We
-    // want to avoid calculating the live range of the source register if
-    // possible.
+    // Adjust RegAssign if a register assignment is killed at Def. We want to
+    // avoid calculating the live range of the source register if possible.
     AssignI.find(Def.getPrevSlot());
     if (!AssignI.valid() || AssignI.start() >= Def)
       continue;
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index 2e60c14..a062763 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -419,7 +419,7 @@ public:
   ///              There may be extra indices created by dead code elimination.
   void finish(SmallVectorImpl<unsigned> *LRMap = nullptr);
 
-  /// dump - print the current interval maping to dbgs().
+  /// dump - print the current interval mapping to dbgs().
   void dump() const;
 
   // ===--- High level methods ---===
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index faf94b6..3541b33 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -48,7 +48,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -364,7 +363,7 @@ void StackColoring::calculateLocalLiveness() {
       }
     }
 
-    BBSet = NextBBSet;
+    BBSet = std::move(NextBBSet);
   }// while changed.
 }
 
@@ -464,7 +463,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
       continue;
     if (SlotRemap.count(VI.Slot)) {
       DEBUG(dbgs() << "Remapping debug info for ["
-                   << DIVariable(VI.Var).getName() << "].\n");
+                   << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
       VI.Slot = SlotRemap[VI.Slot];
       FixedDbg++;
     }
diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 767f43a..d88be57 100644
--- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -14,24 +14,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/StackMapLivenessAnalysis.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
 #define DEBUG_TYPE "stackmaps"
 
-namespace llvm {
-cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness",
-  cl::Hidden, cl::init(true),
-  cl::desc("Enable PatchPoint Liveness Analysis Pass"));
-}
+static cl::opt<bool> EnablePatchPointLiveness(
+    "enable-patchpoint-liveness", cl::Hidden, cl::init(true),
+    cl::desc("Enable PatchPoint Liveness Analysis Pass"));
 
 STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
 STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped");
@@ -39,6 +39,46 @@ STATISTIC(NumBBsVisited,          "Number of basic blocks visited");
 STATISTIC(NumBBsHaveNoStackmap,   "Number of basic blocks with no stackmap");
 STATISTIC(NumStackMaps,           "Number of StackMaps visited");
 
+namespace {
+/// \brief This pass calculates the liveness information for each basic block in
+/// a function and attaches the register live-out information to a patchpoint
+/// intrinsic if present.
+///
+/// This pass can be disabled via the -enable-patchpoint-liveness=false flag.
+/// The pass skips functions that don't have any patchpoint intrinsics. The
+/// information provided by this pass is optional and not required by the
+/// aformentioned intrinsic to function.
+class StackMapLiveness : public MachineFunctionPass {
+  MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+  LivePhysRegs LiveRegs;
+
+public:
+  static char ID;
+
+  /// \brief Default construct and initialize the pass.
+  StackMapLiveness();
+
+  /// \brief Tell the pass manager which passes we depend on and what
+  /// information we preserve.
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// \brief Calculate the liveness information for the given machine function.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  /// \brief Performs the actual liveness calculation for the function.
+  bool calculateLiveness();
+
+  /// \brief Add the current register live set to the instruction.
+  void addLiveOutSetToMI(MachineInstr &MI);
+
+  /// \brief Create a register mask and initialize it with the registers from
+  /// the register live set.
+  uint32_t *createRegisterMask() const;
+};
+} // namespace
+
 char StackMapLiveness::ID = 0;
 char &llvm::StackMapLivenessID = StackMapLiveness::ID;
 INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness",
@@ -60,18 +100,18 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 /// Calculate the liveness information for the given machine function.
-bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
+bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
   if (!EnablePatchPointLiveness)
     return false;
 
-  DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
-               << _MF.getName() << " **********\n");
-  MF = &_MF;
-  TRI = MF->getSubtarget().getRegisterInfo();
+  DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName()
+               << " **********\n");
+  this->MF = &MF;
+  TRI = MF.getSubtarget().getRegisterInfo();
   ++NumStackMapFuncVisited;
 
   // Skip this function if there are no patchpoints to process.
-  if (!MF->getFrameInfo()->hasPatchPoint()) {
+  if (!MF.getFrameInfo()->hasPatchPoint()) {
     ++NumStackMapFuncSkipped;
     return false;
   }
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index f1d1160..ffe59c1 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -19,8 +19,6 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOpcodes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -76,21 +74,32 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
     llvm_unreachable("Unsupported stackmap version!");
 }
 
+/// Go up the super-register chain until we hit a valid dwarf register number.
+static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
+  int RegNo = TRI->getDwarfRegNum(Reg, false);
+  for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
+    RegNo = TRI->getDwarfRegNum(*SR, false);
+
+  assert(RegNo >= 0 && "Invalid Dwarf register number.");
+  return (unsigned) RegNo;
+}
+
 MachineInstr::const_mop_iterator
 StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
                         MachineInstr::const_mop_iterator MOE,
                         LocationVec &Locs, LiveOutVec &LiveOuts) const {
+  const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
   if (MOI->isImm()) {
     switch (MOI->getImm()) {
     default: llvm_unreachable("Unrecognized operand type.");
     case StackMaps::DirectMemRefOp: {
-      unsigned Size =
-          AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits();
+      unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
       assert((Size % 8) == 0 && "Need pointer size in bytes.");
       Size /= 8;
       unsigned Reg = (++MOI)->getReg();
       int64_t Imm = (++MOI)->getImm();
-      Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm));
+      Locs.push_back(Location(StackMaps::Location::Direct, Size,
+                              getDwarfRegNum(Reg, TRI), Imm));
       break;
     }
     case StackMaps::IndirectMemRefOp: {
@@ -98,7 +107,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
       assert(Size > 0 && "Need a valid size for indirect memory locations.");
       unsigned Reg = (++MOI)->getReg();
       int64_t Imm = (++MOI)->getImm();
-      Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm));
+      Locs.push_back(Location(StackMaps::Location::Indirect, Size,
+                              getDwarfRegNum(Reg, TRI), Imm));
       break;
     }
     case StackMaps::ConstantOp: {
@@ -123,12 +133,18 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
 
     assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
            "Virtreg operands should have been rewritten before now.");
-    const TargetRegisterClass *RC =
-        AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass(
-            MOI->getReg());
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
     assert(!MOI->getSubReg() && "Physical subreg still around.");
+
+    unsigned Offset = 0;
+    unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI);
+    unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
+    unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg());
+    if (SubRegIdx)
+      Offset = TRI->getSubRegIdxOffset(SubRegIdx);
+
     Locs.push_back(
-      Location(Location::Register, RC->getSize(), MOI->getReg(), 0));
+      Location(Location::Register, RC->getSize(), RegNo, Offset));
     return ++MOI;
   }
 
@@ -138,14 +154,74 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
   return ++MOI;
 }
 
-/// Go up the super-register chain until we hit a valid dwarf register number.
-static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
-  int RegNo = TRI->getDwarfRegNum(Reg, false);
-  for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
-    RegNo = TRI->getDwarfRegNum(*SR, false);
+void StackMaps::print(raw_ostream &OS) {
+  const TargetRegisterInfo *TRI =
+      AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr;
+  OS << WSMP << "callsites:\n";
+  for (const auto &CSI : CSInfos) {
+    const LocationVec &CSLocs = CSI.Locations;
+    const LiveOutVec &LiveOuts = CSI.LiveOuts;
 
-  assert(RegNo >= 0 && "Invalid Dwarf register number.");
-  return (unsigned) RegNo;
+    OS << WSMP << "callsite " << CSI.ID << "\n";
+    OS << WSMP << "  has " << CSLocs.size() << " locations\n";
+
+    unsigned OperIdx = 0;
+    for (const auto &Loc : CSLocs) {
+      OS << WSMP << "  Loc " << OperIdx << ": ";
+      switch (Loc.LocType) {
+      case Location::Unprocessed:
+        OS << "<Unprocessed operand>";
+        break;
+      case Location::Register:
+        OS << "Register ";
+	if (TRI)
+	  OS << TRI->getName(Loc.Reg);
+	else
+	  OS << Loc.Reg;
+        break;
+      case Location::Direct:
+        OS << "Direct ";
+        if (TRI)
+          OS << TRI->getName(Loc.Reg);
+        else
+          OS << Loc.Reg;
+        if (Loc.Offset)
+          OS << " + " << Loc.Offset;
+        break;
+      case Location::Indirect:
+        OS << "Indirect ";
+        if (TRI)
+          OS << TRI->getName(Loc.Reg);
+        else
+          OS << Loc.Reg;
+        OS << "+" << Loc.Offset;
+        break;
+      case Location::Constant:
+        OS << "Constant " << Loc.Offset;
+        break;
+      case Location::ConstantIndex:
+        OS << "Constant Index " << Loc.Offset;
+        break;
+      }
+      OS << "     [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size
+         << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
+      OperIdx++;
+    }
+
+    OS << WSMP << "  has " << LiveOuts.size() << " live-out registers\n";
+
+    OperIdx = 0;
+    for (const auto &LO : LiveOuts) {
+      OS << WSMP << "  LO " << OperIdx << ": ";
+      if (TRI)
+        OS << TRI->getName(LO.Reg);
+      else
+        OS << LO.Reg;
+      OS << "      [encoding: .short " << LO.RegNo << ", .byte 0, .byte "
+         << LO.Size << "]\n";
+      OperIdx++;
+    }
+  }
 }
 
 /// Create a live-out register record for the given register Reg.
@@ -161,7 +237,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
 StackMaps::LiveOutVec
 StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
   assert(Mask && "No register mask specified");
-  const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
   LiveOutVec LiveOuts;
 
   // Create a LiveOutReg for each bit that is set in the register mask.
@@ -197,9 +273,9 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
                                     MachineInstr::const_mop_iterator MOE,
                                     bool recordResult) {
 
-  MCContext &OutContext = AP.OutStreamer.getContext();
-  MCSymbol *MILabel = OutContext.CreateTempSymbol();
-  AP.OutStreamer.EmitLabel(MILabel);
+  MCContext &OutContext = AP.OutStreamer->getContext();
+  MCSymbol *MILabel = OutContext.createTempSymbol();
+  AP.OutStreamer->EmitLabel(MILabel);
 
   LocationVec Locations;
   LiveOutVec LiveOuts;
@@ -294,9 +370,8 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) {
   // Record all the deopt and gc operands (they're contiguous and run from the
   // initial index to the end of the operand list)
   const unsigned StartIdx = opers.getVarIdx();
-  recordStackMapOpers(MI, 0xABCDEF00,
-                      MI.operands_begin() + StartIdx, MI.operands_end(),
-                      false);
+  recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx,
+                      MI.operands_end(), false);
 }
 
 /// Emit the stackmap header.
@@ -384,16 +459,13 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
 ///   0x3, Indirect, [Reg + Offset]      (spilled value)
 ///   0x4, Constant, Offset              (small constant)
 ///   0x5, ConstIndex, Constants[Offset] (large constant)
-void StackMaps::emitCallsiteEntries(MCStreamer &OS,
-                                    const TargetRegisterInfo *TRI) {
+void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
+  DEBUG(print(dbgs()));
   // Callsite entries.
-  DEBUG(dbgs() << WSMP << "callsites:\n");
   for (const auto &CSI : CSInfos) {
     const LocationVec &CSLocs = CSI.Locations;
     const LiveOutVec &LiveOuts = CSI.LiveOuts;
 
-    DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n");
-
     // Verify stack map entry. It's better to communicate a problem to the
     // runtime than crash in case of in-process compilation. Currently, we do
     // simple overflow checks, but we may eventually communicate other
@@ -414,83 +486,20 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS,
 
     // Reserved for flags.
     OS.EmitIntValue(0, 2);
-
-    DEBUG(dbgs() << WSMP << "  has " << CSLocs.size() << " locations\n");
-
     OS.EmitIntValue(CSLocs.size(), 2);
 
-    unsigned OperIdx = 0;
     for (const auto &Loc : CSLocs) {
-      unsigned RegNo = 0;
-      int Offset = Loc.Offset;
-      if(Loc.Reg) {
-        RegNo = getDwarfRegNum(Loc.Reg, TRI);
-
-        // If this is a register location, put the subregister byte offset in
-        // the location offset.
-        if (Loc.LocType == Location::Register) {
-          assert(!Loc.Offset && "Register location should have zero offset");
-          unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
-          unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg);
-          if (SubRegIdx)
-            Offset = TRI->getSubRegIdxOffset(SubRegIdx);
-        }
-      }
-      else {
-        assert(Loc.LocType != Location::Register &&
-               "Missing location register");
-      }
-
-      DEBUG(dbgs() << WSMP << "  Loc " << OperIdx << ": ";
-            switch (Loc.LocType) {
-            case Location::Unprocessed:
-              dbgs() << "<Unprocessed operand>";
-              break;
-            case Location::Register:
-              dbgs() << "Register " << TRI->getName(Loc.Reg);
-              break;
-            case Location::Direct:
-              dbgs() << "Direct " << TRI->getName(Loc.Reg);
-              if (Loc.Offset)
-              dbgs() << " + " << Loc.Offset;
-              break;
-            case Location::Indirect:
-              dbgs() << "Indirect " << TRI->getName(Loc.Reg)
-              << " + " << Loc.Offset;
-              break;
-            case Location::Constant:
-              dbgs() << "Constant " << Loc.Offset;
-              break;
-            case Location::ConstantIndex:
-              dbgs() << "Constant Index " << Loc.Offset;
-              break;
-              }
-            dbgs() << "     [encoding: .byte " << Loc.LocType
-            << ", .byte " << Loc.Size
-            << ", .short " << RegNo
-            << ", .int " << Offset << "]\n";
-            );
-
       OS.EmitIntValue(Loc.LocType, 1);
       OS.EmitIntValue(Loc.Size, 1);
-      OS.EmitIntValue(RegNo, 2);
-      OS.EmitIntValue(Offset, 4);
-      OperIdx++;
+      OS.EmitIntValue(Loc.Reg, 2);
+      OS.EmitIntValue(Loc.Offset, 4);
     }
 
-    DEBUG(dbgs() << WSMP << "  has " << LiveOuts.size()
-                         << " live-out registers\n");
-
     // Num live-out registers and padding to align to 4 byte.
     OS.EmitIntValue(0, 2);
     OS.EmitIntValue(LiveOuts.size(), 2);
 
-    OperIdx = 0;
     for (const auto &LO : LiveOuts) {
-      DEBUG(dbgs() << WSMP << "  LO " << OperIdx << ": "
-                           << TRI->getName(LO.Reg)
-                           << "     [encoding: .short " << LO.RegNo
-                           << ", .byte 0, .byte " << LO.Size << "]\n");
       OS.EmitIntValue(LO.RegNo, 2);
       OS.EmitIntValue(0, 1);
       OS.EmitIntValue(LO.Size, 1);
@@ -511,24 +520,23 @@ void StackMaps::serializeToStackMapSection() {
   if (CSInfos.empty())
     return;
 
-  MCContext &OutContext = AP.OutStreamer.getContext();
-  MCStreamer &OS = AP.OutStreamer;
-  const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
+  MCContext &OutContext = AP.OutStreamer->getContext();
+  MCStreamer &OS = *AP.OutStreamer;
 
   // Create the section.
-  const MCSection *StackMapSection =
-    OutContext.getObjectFileInfo()->getStackMapSection();
+  MCSection *StackMapSection =
+      OutContext.getObjectFileInfo()->getStackMapSection();
   OS.SwitchSection(StackMapSection);
 
   // Emit a dummy symbol to force section inclusion.
-  OS.EmitLabel(OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps")));
+  OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
 
   // Serialize data.
   DEBUG(dbgs() << "********** Stack Map Output **********\n");
   emitStackmapHeader(OS);
   emitFunctionFrameRecords(OS);
   emitConstantPoolEntries(OS);
-  emitCallsiteEntries(OS, TRI);
+  emitCallsiteEntries(OS);
   OS.AddBlankLine();
 
   // Clean up.
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index a132805..0824d6f 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -88,10 +88,9 @@ bool StackProtector::runOnFunction(Function &Fn) {
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  TLI = TM->getSubtargetImpl()->getTargetLowering();
+  TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
 
-  Attribute Attr = Fn.getAttributes().getAttribute(
-      AttributeSet::FunctionIndex, "stack-protector-buffer-size");
+  Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size");
   if (Attr.isStringAttribute() &&
       Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
       return false; // Invalid integer string
@@ -201,15 +200,12 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
 bool StackProtector::RequiresStackProtector() {
   bool Strong = false;
   bool NeedsProtector = false;
-  if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                      Attribute::StackProtectReq)) {
+  if (F->hasFnAttribute(Attribute::StackProtectReq)) {
     NeedsProtector = true;
     Strong = true; // Use the same heuristic as strong to determine SSPLayout
-  } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                             Attribute::StackProtectStrong))
+  } else if (F->hasFnAttribute(Attribute::StackProtectStrong))
     Strong = true;
-  else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                            Attribute::StackProtect))
+  else if (!F->hasFnAttribute(Attribute::StackProtect))
     return false;
 
   for (const BasicBlock &BB : *F) {
@@ -357,8 +353,8 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
   IRBuilder<> B(&F->getEntryBlock().front());
   AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
   LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard");
-  B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI,
-                AI);
+  B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+               {LI, AI});
 
   return SupportsSelectionDAGSP;
 }
@@ -492,7 +488,7 @@ BasicBlock *StackProtector::CreateFailBB() {
     Constant *StackChkFail =
         M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context),
                                nullptr);
-    B.CreateCall(StackChkFail);
+    B.CreateCall(StackChkFail, {});
   }
   B.CreateUnreachable();
   return FailBB;
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index cc72e5e..a5a175f 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -184,10 +184,18 @@ void StackSlotColoring::InitializeSlots() {
   UsedColors.resize(LastFI);
   Assignments.resize(LastFI);
 
+  typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair;
+  SmallVector<Pair *, 16> Intervals;
+  Intervals.reserve(LS->getNumIntervals());
+  for (auto &I : *LS)
+    Intervals.push_back(&I);
+  std::sort(Intervals.begin(), Intervals.end(),
+            [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
+
   // Gather all spill slots into a list.
   DEBUG(dbgs() << "Spill slot intervals:\n");
-  for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
-    LiveInterval &li = i->second;
+  for (auto *I : Intervals) {
+    LiveInterval &li = I->second;
     DEBUG(li.dump());
     int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
     if (MFI->isDeadObjectIndex(FI))
diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
index 802cf13..95dfd75 100644
--- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
+++ b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
@@ -12,7 +12,7 @@
 // suitable as a default implementation usable with any collector which can
 // consume the standard stackmap format generated by statepoints, uses the
 // default addrespace to distinguish between gc managed and non-gc managed
-// pointers, and has reasonable relocation semantics.  
+// pointers, and has reasonable relocation semantics.
 //
 //===----------------------------------------------------------------------===//
 
@@ -33,21 +33,22 @@ public:
     NeededSafePoints = 0;
     UsesMetadata = false;
     CustomRoots = false;
-    CustomSafePoints = false;
   }
   Optional<bool> isGCManagedPointer(const Value *V) const override {
     // Method is only valid on pointer typed values.
     PointerType *PT = cast<PointerType>(V->getType());
     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
     // GC managed heap.  We know that a pointer into this heap needs to be
-    // updated and that no other pointer does.
+    // updated and that no other pointer does.  Note that addrspace(1) is used
+    // only as an example, it has no special meaning, and is not reserved for
+    // GC usage.
     return (1 == PT->getAddressSpace());
   }
 };
 }
 
-static GCRegistry::Add<StatepointGC>
-X("statepoint-example", "an example strategy for statepoint");
+static GCRegistry::Add<StatepointGC> X("statepoint-example",
+                                       "an example strategy for statepoint");
 
 namespace llvm {
 void linkStatepointExampleGC() {}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index 4377236..23f41c8 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -449,6 +449,9 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
       DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
       if (VI != LocalVRMap.end()) {
         MO.setReg(VI->second);
+        // Clear any kill flags from this operand.  The new register could have
+        // uses after this one, so kills are not valid here.
+        MO.setIsKill(false);
         MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
       }
     }
@@ -560,8 +563,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
   if (TailDuplicateSize.getNumOccurrences() == 0 &&
-      MF.getFunction()->getAttributes().
-        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
+      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
     MaxDuplicateCount = 1;
   else
     MaxDuplicateCount = TailDuplicateSize;
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index e3f0191..5638324 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <cstdlib>
@@ -22,6 +23,12 @@ using namespace llvm;
 TargetFrameLowering::~TargetFrameLowering() {
 }
 
+/// The default implementation just looks at attribute "no-frame-pointer-elim".
+bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
+  auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim");
+  return Attr.getValueAsString() == "true";
+}
+
 /// getFrameIndexOffset - Returns the displacement from the frame register to
 /// the stack frame of the specified index. This is the default implementation
 /// which is overridden for some targets.
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 608b806..92488de 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -142,6 +142,10 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
   unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
   bool Reg1IsKill = MI->getOperand(Idx1).isKill();
   bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+  bool Reg1IsUndef = MI->getOperand(Idx1).isUndef();
+  bool Reg2IsUndef = MI->getOperand(Idx2).isUndef();
+  bool Reg1IsInternal = MI->getOperand(Idx1).isInternalRead();
+  bool Reg2IsInternal = MI->getOperand(Idx2).isInternalRead();
   // If destination is tied to either of the commuted source register, then
   // it must be updated.
   if (HasDef && Reg0 == Reg1 &&
@@ -172,6 +176,10 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
   MI->getOperand(Idx1).setSubReg(SubReg2);
   MI->getOperand(Idx2).setIsKill(Reg1IsKill);
   MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+  MI->getOperand(Idx2).setIsUndef(Reg1IsUndef);
+  MI->getOperand(Idx1).setIsUndef(Reg2IsUndef);
+  MI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal);
+  MI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
   return MI;
 }
 
@@ -285,21 +293,20 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
 bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
                                         unsigned SubIdx, unsigned &Size,
                                         unsigned &Offset,
-                                        const TargetMachine *TM) const {
+                                        const MachineFunction &MF) const {
   if (!SubIdx) {
     Size = RC->getSize();
     Offset = 0;
     return true;
   }
-  unsigned BitSize =
-      TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx);
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
   // Convert bit size to byte size to be consistent with
   // MCRegisterClass::getSize().
   if (BitSize % 8)
     return false;
 
-  int BitOffset =
-      TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
+  int BitOffset = TRI->getSubRegIdxOffset(SubIdx);
   if (BitOffset < 0 || BitOffset % 8)
     return false;
 
@@ -308,7 +315,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
 
   assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
 
-  if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) {
+  if (!MF.getTarget().getDataLayout()->isLittleEndian()) {
     Offset = RC->getSize() - (Offset + Size);
   }
   return true;
@@ -377,16 +384,13 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   llvm_unreachable("Not a MachO target");
 }
 
-bool TargetInstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
-                     const SmallVectorImpl<unsigned> &Ops) const {
+bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                           ArrayRef<unsigned> Ops) const {
   return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
 }
 
-static MachineInstr* foldPatchpoint(MachineFunction &MF,
-                                    MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops,
-                                    int FrameIndex,
+static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
+                                    ArrayRef<unsigned> Ops, int FrameIndex,
                                     const TargetInstrInfo &TII) {
   unsigned StartIdx = 0;
   switch (MI->getOpcode()) {
@@ -405,9 +409,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
 
   // Return false if any operands requested for folding are not foldable (not
   // part of the stackmap's live values).
-  for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
-       I != E; ++I) {
-    if (*I < StartIdx)
+  for (unsigned Op : Ops) {
+    if (Op < StartIdx)
       return nullptr;
   }
 
@@ -427,8 +430,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
       // Compute the spill slot size and offset.
       const TargetRegisterClass *RC =
         MF.getRegInfo().getRegClass(MO.getReg());
-      bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize,
-                                         SpillOffset, &MF.getTarget());
+      bool Valid =
+          TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF);
       if (!Valid)
         report_fatal_error("cannot spill patchpoint subregister operand");
       MIB.addImm(StackMaps::IndirectMemRefOp);
@@ -448,10 +451,9 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
 /// operand folded, otherwise NULL is returned. The client is responsible for
 /// removing the old instruction and adding the new one in the instruction
 /// stream.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                   const SmallVectorImpl<unsigned> &Ops,
-                                   int FI) const {
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                                 ArrayRef<unsigned> Ops,
+                                                 int FI) const {
   unsigned Flags = 0;
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     if (MI->getOperand(Ops[i]).isDef())
@@ -517,10 +519,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
 /// foldMemoryOperand - Same as the previous version except it allows folding
 /// of any load and store from / to any address, not just from a specific
 /// stack slot.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                   const SmallVectorImpl<unsigned> &Ops,
-                                   MachineInstr* LoadMI) const {
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                                 ArrayRef<unsigned> Ops,
+                                                 MachineInstr *LoadMI) const {
   assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
 #ifndef NDEBUG
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
@@ -651,8 +652,8 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
   bool StackGrowsDown =
     TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
-  int FrameSetupOpcode = getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = getCallFrameDestroyOpcode();
+  unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
 
   if (MI->getOpcode() != FrameSetupOpcode &&
       MI->getOpcode() != FrameDestroyOpcode)
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 9b2fdff..b7f1db6 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -420,6 +420,14 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
     // These are generally not available.
     Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr;
   }
+
+  // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
+  // of the gnueabi-style __gnu_*_ieee.
+  // FIXME: What about other targets?
+  if (TT.isOSDarwin()) {
+    Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2";
+    Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2";
+  }
 }
 
 /// InitLibcallCallingConvs - Set default libcall CallingConvs.
@@ -664,6 +672,44 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
   return UNKNOWN_LIBCALL;
 }
 
+RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) {
+#define OP_TO_LIBCALL(Name, Enum)                                              \
+  case Name:                                                                   \
+    switch (VT.SimpleTy) {                                                     \
+    default:                                                                   \
+      return UNKNOWN_LIBCALL;                                                  \
+    case MVT::i8:                                                              \
+      return Enum##_1;                                                         \
+    case MVT::i16:                                                             \
+      return Enum##_2;                                                         \
+    case MVT::i32:                                                             \
+      return Enum##_4;                                                         \
+    case MVT::i64:                                                             \
+      return Enum##_8;                                                         \
+    case MVT::i128:                                                            \
+      return Enum##_16;                                                        \
+    }
+
+  switch (Opc) {
+    OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
+    OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
+    OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
+  }
+
+#undef OP_TO_LIBCALL
+
+  return UNKNOWN_LIBCALL;
+}
+
 /// InitCmpLibcallCCs - Set default comparison libcall CC.
 ///
 static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
@@ -695,12 +741,11 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 }
 
 /// NOTE: The TargetMachine owns TLOF.
-TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
-    : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) {
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   initActions();
 
   // Perform these initializations only once.
-  IsLittleEndian = DL->isLittleEndian();
+  IsLittleEndian = getDataLayout()->isLittleEndian();
   MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
   MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
     = MaxStoresPerMemmoveOptSize = 4;
@@ -765,6 +810,19 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
     setOperationAction(ISD::FMINNUM, VT, Expand);
     setOperationAction(ISD::FMAXNUM, VT, Expand);
+    setOperationAction(ISD::FMAD, VT, Expand);
+    setOperationAction(ISD::SMIN, VT, Expand);
+    setOperationAction(ISD::SMAX, VT, Expand);
+    setOperationAction(ISD::UMIN, VT, Expand);
+    setOperationAction(ISD::UMAX, VT, Expand);
+
+    // Overflow operations default to expand
+    setOperationAction(ISD::SADDO, VT, Expand);
+    setOperationAction(ISD::SSUBO, VT, Expand);
+    setOperationAction(ISD::UADDO, VT, Expand);
+    setOperationAction(ISD::USUBO, VT, Expand);
+    setOperationAction(ISD::SMULO, VT, Expand);
+    setOperationAction(ISD::UMULO, VT, Expand);
 
     // These library functions default to expand.
     setOperationAction(ISD::FROUND, VT, Expand);
@@ -791,58 +849,21 @@ void TargetLoweringBase::initActions() {
   setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
 
   // These library functions default to expand.
-  setOperationAction(ISD::FLOG ,  MVT::f16, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f16, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f16, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f16, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f16, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
-  setOperationAction(ISD::FMINNUM, MVT::f16, Expand);
-  setOperationAction(ISD::FMAXNUM, MVT::f16, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f16, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
-  setOperationAction(ISD::FROUND, MVT::f16, Expand);
-  setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f32, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f32, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
-  setOperationAction(ISD::FMINNUM, MVT::f32, Expand);
-  setOperationAction(ISD::FMAXNUM, MVT::f32, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
-  setOperationAction(ISD::FROUND, MVT::f32, Expand);
-  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
-  setOperationAction(ISD::FMINNUM, MVT::f64, Expand);
-  setOperationAction(ISD::FMAXNUM, MVT::f64, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
-  setOperationAction(ISD::FROUND, MVT::f64, Expand);
-  setOperationAction(ISD::FLOG ,  MVT::f128, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f128, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f128, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f128, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f128, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
-  setOperationAction(ISD::FMINNUM, MVT::f128, Expand);
-  setOperationAction(ISD::FMAXNUM, MVT::f128, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f128, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f128, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
-  setOperationAction(ISD::FROUND, MVT::f128, Expand);
+  for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+    setOperationAction(ISD::FLOG ,      VT, Expand);
+    setOperationAction(ISD::FLOG2,      VT, Expand);
+    setOperationAction(ISD::FLOG10,     VT, Expand);
+    setOperationAction(ISD::FEXP ,      VT, Expand);
+    setOperationAction(ISD::FEXP2,      VT, Expand);
+    setOperationAction(ISD::FFLOOR,     VT, Expand);
+    setOperationAction(ISD::FMINNUM,    VT, Expand);
+    setOperationAction(ISD::FMAXNUM,    VT, Expand);
+    setOperationAction(ISD::FNEARBYINT, VT, Expand);
+    setOperationAction(ISD::FCEIL,      VT, Expand);
+    setOperationAction(ISD::FRINT,      VT, Expand);
+    setOperationAction(ISD::FTRUNC,     VT, Expand);
+    setOperationAction(ISD::FROUND,     VT, Expand);
+  }
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -858,7 +879,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
 }
 
 unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
-  return DL->getPointerSizeInBits(AS);
+  return getDataLayout()->getPointerSizeInBits(AS);
 }
 
 unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
@@ -867,7 +888,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
 }
 
 MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
-  return MVT::getIntegerVT(8*DL->getPointerSize(0));
+  return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0));
 }
 
 EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
@@ -894,6 +915,138 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
   }
 }
 
+TargetLoweringBase::LegalizeKind
+TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
+  // If this is a simple type, use the ComputeRegisterProp mechanism.
+  if (VT.isSimple()) {
+    MVT SVT = VT.getSimpleVT();
+    assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
+    MVT NVT = TransformToType[SVT.SimpleTy];
+    LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
+
+    assert((LA == TypeLegal || LA == TypeSoftenFloat ||
+            ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) &&
+           "Promote may not follow Expand or Promote");
+
+    if (LA == TypeSplitVector)
+      return LegalizeKind(LA,
+                          EVT::getVectorVT(Context, SVT.getVectorElementType(),
+                                           SVT.getVectorNumElements() / 2));
+    if (LA == TypeScalarizeVector)
+      return LegalizeKind(LA, SVT.getVectorElementType());
+    return LegalizeKind(LA, NVT);
+  }
+
+  // Handle Extended Scalar Types.
+  if (!VT.isVector()) {
+    assert(VT.isInteger() && "Float types must be simple");
+    unsigned BitSize = VT.getSizeInBits();
+    // First promote to a power-of-two size, then expand if necessary.
+    if (BitSize < 8 || !isPowerOf2_32(BitSize)) {
+      EVT NVT = VT.getRoundIntegerType(Context);
+      assert(NVT != VT && "Unable to round integer VT");
+      LegalizeKind NextStep = getTypeConversion(Context, NVT);
+      // Avoid multi-step promotion.
+      if (NextStep.first == TypePromoteInteger)
+        return NextStep;
+      // Return rounded integer type.
+      return LegalizeKind(TypePromoteInteger, NVT);
+    }
+
+    return LegalizeKind(TypeExpandInteger,
+                        EVT::getIntegerVT(Context, VT.getSizeInBits() / 2));
+  }
+
+  // Handle vector types.
+  unsigned NumElts = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+
+  // Vectors with only one element are always scalarized.
+  if (NumElts == 1)
+    return LegalizeKind(TypeScalarizeVector, EltVT);
+
+  // Try to widen vector elements until the element type is a power of two and
+  // promote it to a legal type later on, for example:
+  // <3 x i8> -> <4 x i8> -> <4 x i32>
+  if (EltVT.isInteger()) {
+    // Vectors with a number of elements that is not a power of two are always
+    // widened, for example <3 x i8> -> <4 x i8>.
+    if (!VT.isPow2VectorType()) {
+      NumElts = (unsigned)NextPowerOf2(NumElts);
+      EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
+      return LegalizeKind(TypeWidenVector, NVT);
+    }
+
+    // Examine the element type.
+    LegalizeKind LK = getTypeConversion(Context, EltVT);
+
+    // If type is to be expanded, split the vector.
+    //  <4 x i140> -> <2 x i140>
+    if (LK.first == TypeExpandInteger)
+      return LegalizeKind(TypeSplitVector,
+                          EVT::getVectorVT(Context, EltVT, NumElts / 2));
+
+    // Promote the integer element types until a legal vector type is found
+    // or until the element integer type is too big. If a legal type was not
+    // found, fallback to the usual mechanism of widening/splitting the
+    // vector.
+    EVT OldEltVT = EltVT;
+    while (1) {
+      // Increase the bitwidth of the element to the next pow-of-two
+      // (which is greater than 8 bits).
+      EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
+                  .getRoundIntegerType(Context);
+
+      // Stop trying when getting a non-simple element type.
+      // Note that vector elements may be greater than legal vector element
+      // types. Example: X86 XMM registers hold 64bit element on 32bit
+      // systems.
+      if (!EltVT.isSimple())
+        break;
+
+      // Build a new vector type and check if it is legal.
+      MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+      // Found a legal promoted vector type.
+      if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
+        return LegalizeKind(TypePromoteInteger,
+                            EVT::getVectorVT(Context, EltVT, NumElts));
+    }
+
+    // Reset the type to the unexpanded type if we did not find a legal vector
+    // type with a promoted vector element type.
+    EltVT = OldEltVT;
+  }
+
+  // Try to widen the vector until a legal type is found.
+  // If there is no wider legal type, split the vector.
+  while (1) {
+    // Round up to the next power of 2.
+    NumElts = (unsigned)NextPowerOf2(NumElts);
+
+    // If there is no simple vector type with this many elements then there
+    // cannot be a larger legal vector type.  Note that this assumes that
+    // there are no skipped intermediate vector types in the simple types.
+    if (!EltVT.isSimple())
+      break;
+    MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+    if (LargerVector == MVT())
+      break;
+
+    // If this type is legal then widen the vector.
+    if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal)
+      return LegalizeKind(TypeWidenVector, LargerVector);
+  }
+
+  // Widen odd vectors to next power of two.
+  if (!VT.isPow2VectorType()) {
+    EVT NVT = VT.getPow2VectorType(Context);
+    return LegalizeKind(TypeWidenVector, NVT);
+  }
+
+  // Vectors with illegal element types are expanded.
+  EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
+  return LegalizeKind(TypeSplitVector, NVT);
+}
 
 static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                           unsigned &NumIntermediates,
@@ -997,8 +1150,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
     }
     MachineMemOperand *MMO = MF.getMachineMemOperand(
         MachinePointerInfo::getFixedStack(FI), Flags,
-        TM.getSubtargetImpl()->getDataLayout()->getPointerSize(),
-        MFI.getObjectAlignment(FI));
+        TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI));
     MIB->addMemOperand(MF, MMO);
 
     // Replace the instruction and update the operand index.
@@ -1012,10 +1164,13 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
 
 /// findRepresentativeClass - Return the largest legal super-reg register class
 /// of the register class for the specified type and its associated "cost".
-std::pair<const TargetRegisterClass*, uint8_t>
-TargetLoweringBase::findRepresentativeClass(MVT VT) const {
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+// This function is in TargetLowering because it uses RegClassForVT which would
+// need to be moved to TargetRegisterInfo and would necessitate moving
+// isTypeLegal over as well - a massive change that would just require
+// TargetLowering having a TargetRegisterInfo class member that it would use.
+std::pair<const TargetRegisterClass *, uint8_t>
+TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
+                                            MVT VT) const {
   const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
   if (!RC)
     return std::make_pair(RC, 0);
@@ -1041,7 +1196,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const {
 
 /// computeRegisterProperties - Once all of the register classes are added,
 /// this allows us to compute derived properties we expose.
-void TargetLoweringBase::computeRegisterProperties() {
+void TargetLoweringBase::computeRegisterProperties(
+    const TargetRegisterInfo *TRI) {
   static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
                 "Too many value types for ValueTypeActions to hold!");
 
@@ -1110,27 +1266,29 @@ void TargetLoweringBase::computeRegisterProperties() {
     ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
   }
 
-  // Decide how to handle f32. If the target does not have native support for
-  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  // Decide how to handle f32. If the target does not have native f32 support,
+  // expand it to i32 and we will be generating soft float library calls.
   if (!isTypeLegal(MVT::f32)) {
-    if (isTypeLegal(MVT::f64)) {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
-      TransformToType[MVT::f32] = MVT::f64;
-      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
-    } else {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
-      TransformToType[MVT::f32] = MVT::i32;
-      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
-    }
+    NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+    RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+    TransformToType[MVT::f32] = MVT::i32;
+    ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
   }
 
   if (!isTypeLegal(MVT::f16)) {
-    NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
-    RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
-    TransformToType[MVT::f16] = MVT::i16;
-    ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
+    // If the target has native f32 support, promote f16 operations to f32.  If
+    // f32 is not supported, generate soft float library calls.
+    if (isTypeLegal(MVT::f32)) {
+      NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+      RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+      TransformToType[MVT::f16] = MVT::f32;
+      ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
+    } else {
+      NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
+      RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
+      TransformToType[MVT::f16] = MVT::i16;
+      ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
+    }
   }
 
   // Loop over all of the vector value types to see which need transformations.
@@ -1223,7 +1381,7 @@ void TargetLoweringBase::computeRegisterProperties() {
   for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
     const TargetRegisterClass* RRC;
     uint8_t Cost;
-    std::tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+    std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
     RepRegClassForVT[i] = RRC;
     RepRegClassCostForVT[i] = Cost;
   }
@@ -1366,7 +1524,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
 /// function arguments in the caller parameter area.  This is the actual
 /// alignment, not its logarithm.
 unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
-  return DL->getABITypeAlignment(Ty);
+  return getDataLayout()->getABITypeAlignment(Ty);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9f1e06b..a32bdf8 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -31,6 +31,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -50,7 +51,7 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
     MachineModuleInfo *MMI) const {
   unsigned Encoding = getPersonalityEncoding();
   if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
-    return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+    return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
                                           TM.getSymbol(GV, Mang)->getName());
   if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
     return TM.getSymbol(GV, Mang);
@@ -62,21 +63,17 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                        const MCSymbol *Sym) const {
   SmallString<64> NameData("DW.ref.");
   NameData += Sym->getName();
-  MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
+  MCSymbol *Label = getContext().getOrCreateSymbol(NameData);
   Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
   Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
   StringRef Prefix = ".data.";
   NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
   unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
-  const MCSection *Sec = getContext().getELFSection(NameData,
-                                                    ELF::SHT_PROGBITS,
-                                                    Flags,
-                                                    SectionKind::getDataRel(),
-                                                    0, Label->getName());
-  unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
+  MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS,
+                                              Flags, 0, Label->getName());
+  unsigned Size = TM.getDataLayout()->getPointerSize();
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(
-      TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment());
+  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
   const MCExpr *E = MCConstantExpr::Create(Size, getContext());
   Streamer.EmitELFSize(Label, E);
@@ -166,9 +163,7 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
   return ELF::SHT_PROGBITS;
 }
 
-
-static unsigned
-getELFSectionFlags(SectionKind K) {
+static unsigned getELFSectionFlags(SectionKind K) {
   unsigned Flags = 0;
 
   if (!K.isMetadata())
@@ -183,9 +178,7 @@ getELFSectionFlags(SectionKind K) {
   if (K.isThreadLocal())
     Flags |= ELF::SHF_TLS;
 
-  // K.isMergeableConst() is left out to honour PR4650
-  if (K.isMergeableCString() || K.isMergeableConst4() ||
-      K.isMergeableConst8() || K.isMergeableConst16())
+  if (K.isMergeableCString() || K.isMergeableConst())
     Flags |= ELF::SHF_MERGE;
 
   if (K.isMergeableCString())
@@ -206,7 +199,7 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
   return C;
 }
 
-const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
+MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
   StringRef SectionName = GV->getSection();
@@ -222,126 +215,141 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
   }
   return getContext().getELFSection(SectionName,
                                     getELFSectionType(SectionName, Kind), Flags,
-                                    Kind, /*EntrySize=*/0, Group);
+                                    /*EntrySize=*/0, Group);
 }
 
-/// getSectionPrefixForGlobal - Return the section prefix name used by options
-/// FunctionsSections and DataSections.
+/// Return the section prefix name used by options FunctionsSections and
+/// DataSections.
 static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
-  if (Kind.isText())                 return ".text.";
-  if (Kind.isReadOnly())             return ".rodata.";
-  if (Kind.isBSS())                  return ".bss.";
-
-  if (Kind.isThreadData())           return ".tdata.";
-  if (Kind.isThreadBSS())            return ".tbss.";
-
-  if (Kind.isDataNoRel())            return ".data.";
-  if (Kind.isDataRelLocal())         return ".data.rel.local.";
-  if (Kind.isDataRel())              return ".data.rel.";
-  if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
-
+  if (Kind.isText())
+    return ".text";
+  if (Kind.isReadOnly())
+    return ".rodata";
+  if (Kind.isBSS())
+    return ".bss";
+  if (Kind.isThreadData())
+    return ".tdata";
+  if (Kind.isThreadBSS())
+    return ".tbss";
+  if (Kind.isDataNoRel())
+    return ".data";
+  if (Kind.isDataRelLocal())
+    return ".data.rel.local";
+  if (Kind.isDataRel())
+    return ".data.rel";
+  if (Kind.isReadOnlyWithRelLocal())
+    return ".data.rel.ro.local";
   assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return ".data.rel.ro.";
+  return ".data.rel.ro";
 }
 
-const MCSection *TargetLoweringObjectFileELF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler &Mang, const TargetMachine &TM) const {
-  // If we have -ffunction-section or -fdata-section then we should emit the
-  // global value to a uniqued section specifically for it.
-  bool EmitUniquedSection;
-  if (Kind.isText())
-    EmitUniquedSection = TM.getFunctionSections();
-  else
-    EmitUniquedSection = TM.getDataSections();
-
-  // If this global is linkonce/weak and the target handles this by emitting it
-  // into a 'uniqued' section name, create and return the section now.
-  if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
-      !Kind.isCommon()) {
-    StringRef Prefix = getSectionPrefixForGlobal(Kind);
-
-    SmallString<128> Name(Prefix);
-    TM.getNameWithPrefix(Name, GV, Mang, true);
-
-    StringRef Group = "";
-    unsigned Flags = getELFSectionFlags(Kind);
-    if (GV->isWeakForLinker() || GV->hasComdat()) {
-      if (const Comdat *C = getELFComdat(GV))
-        Group = C->getName();
-      else
-        Group = Name.substr(Prefix.size());
-      Flags |= ELF::SHF_GROUP;
+static MCSectionELF *
+selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
+                          SectionKind Kind, Mangler &Mang,
+                          const TargetMachine &TM, bool EmitUniqueSection,
+                          unsigned Flags, unsigned *NextUniqueID) {
+  unsigned EntrySize = 0;
+  if (Kind.isMergeableCString()) {
+    if (Kind.isMergeable2ByteCString()) {
+      EntrySize = 2;
+    } else if (Kind.isMergeable4ByteCString()) {
+      EntrySize = 4;
+    } else {
+      EntrySize = 1;
+      assert(Kind.isMergeable1ByteCString() && "unknown string width");
+    }
+  } else if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4()) {
+      EntrySize = 4;
+    } else if (Kind.isMergeableConst8()) {
+      EntrySize = 8;
+    } else {
+      assert(Kind.isMergeableConst16() && "unknown data width");
+      EntrySize = 16;
     }
-
-    return getContext().getELFSection(Name.str(),
-                                      getELFSectionType(Name.str(), Kind),
-                                      Flags, Kind, 0, Group);
   }
 
-  if (Kind.isText()) return TextSection;
-
-  if (Kind.isMergeable1ByteCString() ||
-      Kind.isMergeable2ByteCString() ||
-      Kind.isMergeable4ByteCString()) {
+  StringRef Group = "";
+  if (const Comdat *C = getELFComdat(GV)) {
+    Flags |= ELF::SHF_GROUP;
+    Group = C->getName();
+  }
 
+  bool UniqueSectionNames = TM.getUniqueSectionNames();
+  SmallString<128> Name;
+  if (Kind.isMergeableCString()) {
     // We also need alignment here.
     // FIXME: this is getting the alignment of the character, not the
     // alignment of the global!
     unsigned Align =
-        TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
-            cast<GlobalVariable>(GV));
-
-    const char *SizeSpec = ".rodata.str1.";
-    if (Kind.isMergeable2ByteCString())
-      SizeSpec = ".rodata.str2.";
-    else if (Kind.isMergeable4ByteCString())
-      SizeSpec = ".rodata.str4.";
-    else
-      assert(Kind.isMergeable1ByteCString() && "unknown string width");
-
+        TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
 
-    std::string Name = SizeSpec + utostr(Align);
-    return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
-                                      ELF::SHF_ALLOC |
-                                      ELF::SHF_MERGE |
-                                      ELF::SHF_STRINGS,
-                                      Kind);
+    std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+    Name = SizeSpec + utostr(Align);
+  } else if (Kind.isMergeableConst()) {
+    Name = ".rodata.cst";
+    Name += utostr(EntrySize);
+  } else {
+    Name = getSectionPrefixForGlobal(Kind);
   }
 
-  if (Kind.isMergeableConst()) {
-    if (Kind.isMergeableConst4() && MergeableConst4Section)
-      return MergeableConst4Section;
-    if (Kind.isMergeableConst8() && MergeableConst8Section)
-      return MergeableConst8Section;
-    if (Kind.isMergeableConst16() && MergeableConst16Section)
-      return MergeableConst16Section;
-    return ReadOnlySection;  // .const
+  if (EmitUniqueSection && UniqueSectionNames) {
+    Name.push_back('.');
+    TM.getNameWithPrefix(Name, GV, Mang, true);
+  }
+  unsigned UniqueID = ~0;
+  if (EmitUniqueSection && !UniqueSectionNames) {
+    UniqueID = *NextUniqueID;
+    (*NextUniqueID)++;
   }
+  return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
+                           EntrySize, Group, UniqueID);
+}
 
-  if (Kind.isReadOnly())             return ReadOnlySection;
+MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
+  unsigned Flags = getELFSectionFlags(Kind);
 
-  if (Kind.isThreadData())           return TLSDataSection;
-  if (Kind.isThreadBSS())            return TLSBSSSection;
+  // If we have -ffunction-section or -fdata-section then we should emit the
+  // global value to a uniqued section specifically for it.
+  bool EmitUniqueSection = false;
+  if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
+    if (Kind.isText())
+      EmitUniqueSection = TM.getFunctionSections();
+    else
+      EmitUniqueSection = TM.getDataSections();
+  }
+  EmitUniqueSection |= GV->hasComdat();
 
-  // Note: we claim that common symbols are put in BSSSection, but they are
-  // really emitted with the magic .comm directive, which creates a symbol table
-  // entry but not a section.
-  if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+  return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
+                                   EmitUniqueSection, Flags, &NextUniqueID);
+}
 
-  if (Kind.isDataNoRel())            return DataSection;
-  if (Kind.isDataRelLocal())         return DataRelLocalSection;
-  if (Kind.isDataRel())              return DataRelSection;
-  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
+    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+  // If the function can be removed, produce a unique section so that
+  // the table doesn't prevent the removal.
+  const Comdat *C = F.getComdat();
+  bool EmitUniqueSection = TM.getFunctionSections() || C;
+  if (!EmitUniqueSection)
+    return ReadOnlySection;
 
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return DataRelROSection;
+  return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
+                                   Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC,
+                                   &NextUniqueID);
+}
+
+bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
+    bool UsesLabelDifference, const Function &F) const {
+  // We can always create relative relocations, so use another section
+  // that can be marked non-executable.
+  return false;
 }
 
-/// getSectionForConstant - Given a mergeable constant with the
-/// specified size and relocation information, return a section that it
-/// should be placed in.
-const MCSection *
+/// Given a mergeable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *
 TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
                                                    const Constant *C) const {
   if (Kind.isMergeableConst4() && MergeableConst4Section)
@@ -358,15 +366,12 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
   return DataRelROSection;
 }
 
-static const MCSectionELF *getStaticStructorSection(MCContext &Ctx,
-                                                    bool UseInitArray,
-                                                    bool IsCtor,
-                                                    unsigned Priority,
-                                                    const MCSymbol *KeySym) {
+static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
+                                              bool IsCtor, unsigned Priority,
+                                              const MCSymbol *KeySym) {
   std::string Name;
   unsigned Type;
   unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
-  SectionKind Kind = SectionKind::getDataRel();
   StringRef COMDAT = KeySym ? KeySym->getName() : "";
 
   if (KeySym)
@@ -398,16 +403,16 @@ static const MCSectionELF *getStaticStructorSection(MCContext &Ctx,
     Type = ELF::SHT_PROGBITS;
   }
 
-  return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT);
+  return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT);
 }
 
-const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
+MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getStaticStructorSection(getContext(), UseInitArray, true, Priority,
                                   KeySym);
 }
 
-const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getStaticStructorSection(getContext(), UseInitArray, false, Priority,
                                   KeySym);
@@ -419,22 +424,21 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
   if (!UseInitArray)
     return;
 
-  StaticCtorSection =
-    getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
-                               ELF::SHF_WRITE |
-                               ELF::SHF_ALLOC,
-                               SectionKind::getDataRel());
-  StaticDtorSection =
-    getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
-                               ELF::SHF_WRITE |
-                               ELF::SHF_ALLOC,
-                               SectionKind::getDataRel());
+  StaticCtorSection = getContext().getELFSection(
+      ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  StaticDtorSection = getContext().getELFSection(
+      ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
 }
 
 //===----------------------------------------------------------------------===//
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+  : TargetLoweringObjectFile() {
+  SupportIndirectSymViaGOTPCRel = true;
+}
+
 /// getDepLibFromLinkerOpt - Extract the dependent library name from a linker
 /// option string. Returns StringRef() if the option does not specify a library.
 StringRef TargetLoweringObjectFileMachO::
@@ -511,12 +515,11 @@ emitModuleFlags(MCStreamer &Streamer,
                        ErrorCode + ".");
 
   // Get the section.
-  const MCSectionMachO *S =
-    getContext().getMachOSection(Segment, Section, TAA, StubSize,
-                                 SectionKind::getDataNoRel());
+  MCSectionMachO *S = getContext().getMachOSection(
+      Segment, Section, TAA, StubSize, SectionKind::getDataNoRel());
   Streamer.SwitchSection(S);
   Streamer.EmitLabel(getContext().
-                     GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+                     getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
   Streamer.EmitIntValue(VersionVal, 4);
   Streamer.EmitIntValue(ImageInfoFlags, 4);
   Streamer.AddBlankLine();
@@ -531,7 +534,7 @@ static void checkMachOComdat(const GlobalValue *GV) {
                      "' cannot be lowered.");
 }
 
-const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
+MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
   // Parse the section specifier and create it if valid.
@@ -552,8 +555,8 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
   }
 
   // Get the section.
-  const MCSectionMachO *S =
-    getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+  MCSectionMachO *S =
+      getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
 
   // If TAA wasn't set by ParseSectionSpecifier() above,
   // use the value returned by getMachOSection() as a default.
@@ -573,9 +576,9 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
   return S;
 }
 
-const MCSection *TargetLoweringObjectFileMachO::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler &Mang, const TargetMachine &TM) const {
+MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
   checkMachOComdat(GV);
 
   // Handle thread local data.
@@ -595,16 +598,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
   // FIXME: Alignment check should be handled by section classifier.
   if (Kind.isMergeable1ByteCString() &&
-      TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
-          cast<GlobalVariable>(GV)) < 32)
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return CStringSection;
 
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
   if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
-      TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
-          cast<GlobalVariable>(GV)) < 32)
+      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return UStringSection;
 
   // With MachO only variables whose corresponding symbol starts with 'l' or
@@ -642,7 +643,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   return DataSection;
 }
 
-const MCSection *
+MCSection *
 TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind,
                                                      const Constant *C) const {
   // If this constant requires a relocation, we have to put it in the data
@@ -711,6 +712,66 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
   return SSym;
 }
 
+const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
+    const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+  // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation
+  // as 64-bit do, we replace the GOT equivalent by accessing the final symbol
+  // through a non_lazy_ptr stub instead. One advantage is that it allows the
+  // computation of deltas to final external symbols. Example:
+  //
+  //    _extgotequiv:
+  //       .long   _extfoo
+  //
+  //    _delta:
+  //       .long   _extgotequiv-_delta
+  //
+  // is transformed to:
+  //
+  //    _delta:
+  //       .long   L_extfoo$non_lazy_ptr-(_delta+0)
+  //
+  //       .section        __IMPORT,__pointers,non_lazy_symbol_pointers
+  //    L_extfoo$non_lazy_ptr:
+  //       .indirect_symbol        _extfoo
+  //       .long   0
+  //
+  MachineModuleInfoMachO &MachOMMI =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  MCContext &Ctx = getContext();
+
+  // The offset must consider the original displacement from the base symbol
+  // since 32-bit targets don't have a GOTPCREL to fold the PC displacement.
+  Offset = -MV.getConstant();
+  const MCSymbol *BaseSym = &MV.getSymB()->getSymbol();
+
+  // Access the final symbol via sym$non_lazy_ptr and generate the appropriated
+  // non_lazy_ptr stubs.
+  SmallString<128> Name;
+  StringRef Suffix = "$non_lazy_ptr";
+  Name += DL->getPrivateGlobalPrefix();
+  Name += Sym->getName();
+  Name += Suffix;
+  MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
+
+  MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
+  if (!StubSym.getPointer())
+    StubSym = MachineModuleInfoImpl::
+      StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
+
+  const MCExpr *BSymExpr =
+    MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
+  const MCExpr *LHS =
+    MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx);
+
+  if (!Offset)
+    return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx);
+
+  const MCExpr *RHS =
+    MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx);
+  return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+}
+
 //===----------------------------------------------------------------------===//
 //                                  COFF
 //===----------------------------------------------------------------------===//
@@ -794,14 +855,14 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
   return 0;
 }
 
-const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
+MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
     const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM) const {
   int Selection = 0;
   unsigned Characteristics = getCOFFSectionFlags(Kind);
   StringRef Name = GV->getSection();
   StringRef COMDATSymName = "";
-  if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) {
+  if (GV->hasComdat()) {
     Selection = getSelectionForCOFF(GV);
     const GlobalValue *ComdatGV;
     if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
@@ -836,10 +897,9 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
   return ".data";
 }
 
-
-const MCSection *TargetLoweringObjectFileCOFF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler &Mang, const TargetMachine &TM) const {
+MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
   // If we have -ffunction-sections then we should emit the global value to a
   // uniqued section specifically for it.
   bool EmitUniquedSection;
@@ -848,12 +908,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   else
     EmitUniquedSection = TM.getDataSections();
 
-  // If this global is linkonce/weak and the target handles this by emitting it
-  // into a 'uniqued' section name, create and return the section now.
-  // Section names depend on the name of the symbol which is not feasible if the
-  // symbol has private linkage.
-  if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
-      !Kind.isCommon()) {
+  if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) {
     const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
     unsigned Characteristics = getCOFFSectionFlags(Kind);
 
@@ -872,6 +927,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
       StringRef COMDATSymName = Sym->getName();
       return getContext().getCOFFSection(Name, Characteristics, Kind,
                                          COMDATSymName, Selection);
+    } else {
+      SmallString<256> TmpData;
+      getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM);
+      return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
+                                         Selection);
     }
   }
 
@@ -893,6 +953,42 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   return DataSection;
 }
 
+void TargetLoweringObjectFileCOFF::getNameWithPrefix(
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+    bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+  if (GV->hasPrivateLinkage() &&
+      ((isa<Function>(GV) && TM.getFunctionSections()) ||
+       (isa<GlobalVariable>(GV) && TM.getDataSections())))
+    CannotUsePrivateLabel = true;
+
+  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
+    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+  // If the function can be removed, produce a unique section so that
+  // the table doesn't prevent the removal.
+  const Comdat *C = F.getComdat();
+  bool EmitUniqueSection = TM.getFunctionSections() || C;
+  if (!EmitUniqueSection)
+    return ReadOnlySection;
+
+  // FIXME: we should produce a symbol for F instead.
+  if (F.hasPrivateLinkage())
+    return ReadOnlySection;
+
+  MCSymbol *Sym = TM.getSymbol(&F, Mang);
+  StringRef COMDATSymName = Sym->getName();
+
+  SectionKind Kind = SectionKind::getReadOnly();
+  const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
+  unsigned Characteristics = getCOFFSectionFlags(Kind);
+  Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+  return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
+                                     COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
+}
+
 StringRef TargetLoweringObjectFileCOFF::
 getDepLibFromLinkerOpt(StringRef LinkerOption) const {
   const char *LibCmd = "/DEFAULTLIB:";
@@ -923,38 +1019,27 @@ emitModuleFlags(MCStreamer &Streamer,
 
   // Emit the linker options to the linker .drectve section.  According to the
   // spec, this section is a space-separated string containing flags for linker.
-  const MCSection *Sec = getDrectveSection();
+  MCSection *Sec = getDrectveSection();
   Streamer.SwitchSection(Sec);
   for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
     MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
     for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
       MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
-      StringRef Op = MDOption->getString();
       // Lead with a space for consistency with our dllexport implementation.
-      std::string Escaped(" ");
-      if (!Op.startswith("\"") && (Op.find(" ") != StringRef::npos)) {
-        // The PE-COFF spec says args with spaces must be quoted.  It doesn't say
-        // how to escape quotes, but it probably uses this algorithm:
-        // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx
-        // FIXME: Reuse escaping code from Support/Windows/Program.inc
-        Escaped.push_back('\"');
-        Escaped.append(Op);
-        Escaped.push_back('\"');
-      } else {
-        Escaped.append(Op);
-      }
-      Streamer.EmitBytes(Escaped);
+      std::string Directive(" ");
+      Directive.append(MDOption->getString());
+      Streamer.EmitBytes(Directive);
     }
   }
 }
 
-const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
+MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getContext().getAssociativeCOFFSection(
       cast<MCSectionCOFF>(StaticCtorSection), KeySym);
 }
 
-const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
+MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getContext().getAssociativeCOFFSection(
       cast<MCSectionCOFF>(StaticDtorSection), KeySym);
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 618d903..f4926cb 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -12,23 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 /// DisableFramePointerElim - This returns true if frame pointer elimination
 /// optimization should be disabled for the given machine function.
 bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
-  // Check to see if we should eliminate non-leaf frame pointers and then
-  // check to see if we should eliminate all frame pointers.
-  if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf") &&
-      !NoFramePointerElim) {
-    const MachineFrameInfo *MFI = MF.getFrameInfo();
-    return MFI->hasCalls();
-  }
+  // Check to see if we should eliminate all frame pointers.
+  if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF))
+    return true;
 
-  return NoFramePointerElim;
+  // Check to see if we should eliminate non-leaf frame pointers.
+  if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf"))
+    return MF.getFrameInfo()->hasCalls();
+
+  return false;
 }
 
 /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
@@ -51,10 +54,3 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const {
 StringRef TargetOptions::getTrapFunctionName() const {
   return TrapFuncName;
 }
-
-/// getCFIFuncName - If this returns a non-empty string, then it is the name of
-/// the function that gets called on CFI violations in CFI non-enforcing mode
-/// (!TargetOptions::CFIEnforcing).
-StringRef TargetOptions::getCFIFuncName() const {
-  return CFIFuncName;
-}
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index ef2dab1..299380d 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -224,26 +224,29 @@ unsigned TargetSchedModel::computeOperandLatency(
   return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI);
 }
 
+unsigned
+TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
+  unsigned Latency = 0;
+  for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries;
+       DefIdx != DefEnd; ++DefIdx) {
+    // Lookup the definition's write latency in SubtargetInfo.
+    const MCWriteLatencyEntry *WLEntry =
+      STI->getWriteLatencyEntry(&SCDesc, DefIdx);
+    Latency = std::max(Latency, capLatency(WLEntry->Cycles));
+  }
+  return Latency;
+}
+
 unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
   assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
 
   unsigned SCIdx = TII->get(Opcode).getSchedClass();
   const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx);
-  unsigned Latency = 0;
 
-  if (SCDesc->isValid() && !SCDesc->isVariant()) {
-    for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
-         DefIdx != DefEnd; ++DefIdx) {
-      // Lookup the definition's write latency in SubtargetInfo.
-      const MCWriteLatencyEntry *WLEntry =
-          STI->getWriteLatencyEntry(SCDesc, DefIdx);
-      Latency = std::max(Latency, capLatency(WLEntry->Cycles));
-    }
-    return Latency;
-  }
+  if (SCDesc->isValid() && !SCDesc->isVariant())
+    return computeInstrLatency(*SCDesc);
 
-  assert(Latency && "No MI sched latency");
-  return 0;
+  llvm_unreachable("No MI sched latency");
 }
 
 unsigned
@@ -257,17 +260,8 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
 
   if (hasInstrSchedModel()) {
     const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
-    if (SCDesc->isValid()) {
-      unsigned Latency = 0;
-      for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
-           DefIdx != DefEnd; ++DefIdx) {
-        // Lookup the definition's write latency in SubtargetInfo.
-        const MCWriteLatencyEntry *WLEntry =
-          STI->getWriteLatencyEntry(SCDesc, DefIdx);
-        Latency = std::max(Latency, capLatency(WLEntry->Cycles));
-      }
-      return Latency;
-    }
+    if (SCDesc->isValid())
+      return computeInstrLatency(*SCDesc);
   }
   return TII->defaultDefLatency(SchedModel, MI);
 }
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index e218a83..6bceccc 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -102,6 +103,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
                             MachineBasicBlock::iterator OldPos);
 
+  bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
+
   bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
 
   bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
@@ -186,7 +189,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
 
   // Check if it's safe to move this instruction.
   bool SeenStore = true; // Be conservative.
-  if (!MI->isSafeToMove(TII, AA, SeenStore))
+  if (!MI->isSafeToMove(AA, SeenStore))
     return false;
 
   unsigned DefReg = 0;
@@ -309,6 +312,45 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
   return true;
 }
 
+/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg
+/// in current BB.
+static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
+                                  const MachineRegisterInfo *MRI) {
+  MachineInstr *Ret = nullptr;
+  for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+    if (DefMI.getParent() != BB || DefMI.isDebugValue())
+      continue;
+    if (!Ret)
+      Ret = &DefMI;
+    else if (Ret != &DefMI)
+      return nullptr;
+  }
+  return Ret;
+}
+
+/// Check if there is a reversed copy chain from FromReg to ToReg:
+/// %Tmp1 = copy %Tmp2;
+/// %FromReg = copy %Tmp1;
+/// %ToReg = add %FromReg ...
+/// %Tmp2 = copy %ToReg;
+/// MaxLen specifies the maximum length of the copy chain the func
+/// can walk through.
+bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
+                                               int Maxlen) {
+  unsigned TmpReg = FromReg;
+  for (int i = 0; i < Maxlen; i++) {
+    MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
+    if (!Def || !Def->isCopy())
+      return false;
+
+    TmpReg = Def->getOperand(1).getReg();
+
+    if (TmpReg == ToReg)
+      return true;
+  }
+  return false;
+}
+
 /// noUseAfterLastDef - Return true if there are no intervening uses between the
 /// last instruction in the MBB that defines the specified register and the
 /// two-address instruction which is being processed. It also returns the last
@@ -574,6 +616,27 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
   if (!noUseAfterLastDef(regB, Dist, LastDefB))
     return true;
 
+  // Look for situation like this:
+  // %reg101 = MOV %reg100
+  // %reg102 = ...
+  // %reg103 = ADD %reg102, %reg101
+  // ... = %reg103 ...
+  // %reg100 = MOV %reg103
+  // If there is a reversed copy chain from reg101 to reg103, commute the ADD
+  // to eliminate an otherwise unavoidable copy.
+  // FIXME:
+  // We can extend the logic further: If an pair of operands in an insn has
+  // been merged, the insn could be regarded as a virtual copy, and the virtual
+  // copy could also be used to construct a copy chain.
+  // To more generally minimize register copies, ideally the logic of two addr
+  // instruction pass should be integrated with register allocation pass where
+  // interference graph is available.
+  if (isRevCopyChain(regC, regA, 3))
+    return true;
+
+  if (isRevCopyChain(regB, regA, 3))
+    return false;
+
   // Since there are no intervening uses for both registers, then commute
   // if the def of regC is closer. Its live interval is shorter.
   return LastDefB && LastDefC && LastDefC > LastDefB;
@@ -798,7 +861,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
     return false;
 
   bool SeenStore = true;
-  if (!MI->isSafeToMove(TII, AA, SeenStore))
+  if (!MI->isSafeToMove(AA, SeenStore))
     return false;
 
   if (TII->getInstrLatency(InstrItins, MI) > 1)
@@ -985,7 +1048,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
     return false;
 
   bool SeenStore = true;
-  if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+  if (!KillMI->isSafeToMove(AA, SeenStore))
     return false;
 
   SmallSet<unsigned, 2> Uses;
@@ -1515,9 +1578,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
   const TargetMachine &TM = MF->getTarget();
   MRI = &MF->getRegInfo();
-  TII = TM.getSubtargetImpl()->getInstrInfo();
-  TRI = TM.getSubtargetImpl()->getRegisterInfo();
-  InstrItins = TM.getSubtargetImpl()->getInstrItineraryData();
+  TII = MF->getSubtarget().getInstrInfo();
+  TRI = MF->getSubtarget().getRegisterInfo();
+  InstrItins = MF->getSubtarget().getInstrItineraryData();
   LV = getAnalysisIfAvailable<LiveVariables>();
   LIS = getAnalysisIfAvailable<LiveIntervals>();
   AA = &getAnalysis<AliasAnalysis>();
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 7824f92..d393e10 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -88,7 +88,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
     DeadBlocks[i]->eraseFromParent();
   }
 
-  return DeadBlocks.size();
+  return !DeadBlocks.empty();
 }
 
 
@@ -204,5 +204,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
 
   F.RenumberBlocks();
 
-  return (DeadBlocks.size() || ModifiedPHI);
+  return (!DeadBlocks.empty() || ModifiedPHI);
 }
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 7d3b0ce..9fb1b5b 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -264,8 +264,7 @@ void VirtRegRewriter::addMBBLiveIns() {
             if ((SubRegLaneMask & S.LaneMask) == 0)
               continue;
             for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
-              if (!LiveIn[i]->isLiveIn(SubReg))
-                LiveIn[i]->addLiveIn(SubReg);
+              LiveIn[i]->addLiveIn(SubReg);
             }
           }
           LiveIn.clear();
@@ -277,16 +276,20 @@ void VirtRegRewriter::addMBBLiveIns() {
         if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
           continue;
         for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
-          if (!LiveIn[i]->isLiveIn(PhysReg))
-            LiveIn[i]->addLiveIn(PhysReg);
+          LiveIn[i]->addLiveIn(PhysReg);
         LiveIn.clear();
       }
     }
   }
+
+  // Sort and unique MBB LiveIns as we've not checked if SubReg/PhysReg were in
+  // each MBB's LiveIns set before calling addLiveIn on them.
+  for (MachineBasicBlock &MBB : *MF)
+    MBB.sortUniqueLiveIns();
 }
 
 void VirtRegRewriter::rewrite() {
-  bool NoSubRegLiveness = !MRI->tracksSubRegLiveness();
+  bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
new file mode 100644
index 0000000..7246e1c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -0,0 +1,2482 @@
+//===-- WinEHPrepare - Prepare exception handling for code generation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers LLVM IR exception handling into something closer to what the
+// backend wants for functions using a personality function from a runtime
+// provided by MSVC. Functions with other personality functions are left alone
+// and may be prepared by other passes. In particular, all supported MSVC
+// personality functions require cleanup code to be outlined, and the C++
+// personality requires catch handler code to be outlined.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <memory>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "winehprepare"
+
+namespace {
+
+// This map is used to model frame variable usage during outlining, to
+// construct a structure type to hold the frame variables in a frame
+// allocation block, and to remap the frame variable allocas (including
+// spill locations as needed) to GEPs that get the variable from the
+// frame allocation structure.
+typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
+
+// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't
+// quite null.
+AllocaInst *getCatchObjectSentinel() {
+  return static_cast<AllocaInst *>(nullptr) + 1;
+}
+
+typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
+
+class LandingPadActions;
+class LandingPadMap;
+
+typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
+typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy;
+
+class WinEHPrepare : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid.
+  WinEHPrepare(const TargetMachine *TM = nullptr)
+      : FunctionPass(ID) {
+    if (TM)
+      TheTriple = Triple(TM->getTargetTriple());
+  }
+
+  bool runOnFunction(Function &Fn) override;
+
+  bool doFinalization(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  const char *getPassName() const override {
+    return "Windows exception handling preparation";
+  }
+
+private:
+  bool prepareExceptionHandlers(Function &F,
+                                SmallVectorImpl<LandingPadInst *> &LPads);
+  void identifyEHBlocks(Function &F, SmallVectorImpl<LandingPadInst *> &LPads);
+  void promoteLandingPadValues(LandingPadInst *LPad);
+  void demoteValuesLiveAcrossHandlers(Function &F,
+                                      SmallVectorImpl<LandingPadInst *> &LPads);
+  void findSEHEHReturnPoints(Function &F,
+                             SetVector<BasicBlock *> &EHReturnBlocks);
+  void findCXXEHReturnPoints(Function &F,
+                             SetVector<BasicBlock *> &EHReturnBlocks);
+  void getPossibleReturnTargets(Function *ParentF, Function *HandlerF,
+                                SetVector<BasicBlock*> &Targets);
+  void completeNestedLandingPad(Function *ParentFn,
+                                LandingPadInst *OutlinedLPad,
+                                const LandingPadInst *OriginalLPad,
+                                FrameVarInfoMap &VarInfo);
+  Function *createHandlerFunc(Type *RetTy, const Twine &Name, Module *M,
+                              Value *&ParentFP);
+  bool outlineHandler(ActionHandler *Action, Function *SrcFn,
+                      LandingPadInst *LPad, BasicBlock *StartBB,
+                      FrameVarInfoMap &VarInfo);
+  void addStubInvokeToHandlerIfNeeded(Function *Handler, Value *PersonalityFn);
+
+  void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
+  CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
+                                 VisitedBlockSet &VisitedBlocks);
+  void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB,
+                           BasicBlock *EndBB);
+
+  void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
+
+  Triple TheTriple;
+
+  // All fields are reset by runOnFunction.
+  DominatorTree *DT = nullptr;
+  EHPersonality Personality = EHPersonality::Unknown;
+  CatchHandlerMapTy CatchHandlerMap;
+  CleanupHandlerMapTy CleanupHandlerMap;
+  DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
+  SmallPtrSet<BasicBlock *, 4> NormalBlocks;
+  SmallPtrSet<BasicBlock *, 4> EHBlocks;
+  SetVector<BasicBlock *> EHReturnBlocks;
+
+  // This maps landing pad instructions found in outlined handlers to
+  // the landing pad instruction in the parent function from which they
+  // were cloned.  The cloned/nested landing pad is used as the key
+  // because the landing pad may be cloned into multiple handlers.
+  // This map will be used to add the llvm.eh.actions call to the nested
+  // landing pads after all handlers have been outlined.
+  DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP;
+
+  // This maps blocks in the parent function which are destinations of
+  // catch handlers to cloned blocks in (other) outlined handlers. This
+  // handles the case where a nested landing pads has a catch handler that
+  // returns to a handler function rather than the parent function.
+  // The original block is used as the key here because there should only
+  // ever be one handler function from which the cloned block is not pruned.
+  // The original block will be pruned from the parent function after all
+  // handlers have been outlined.  This map will be used to adjust the
+  // return instructions of handlers which return to the block that was
+  // outlined into a handler.  This is done after all handlers have been
+  // outlined but before the outlined code is pruned from the parent function.
+  DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
+
+  // Map from outlined handler to call to llvm.frameaddress(1). Only used for
+  // 32-bit EH.
+  DenseMap<Function *, Value *> HandlerToParentFP;
+
+  AllocaInst *SEHExceptionCodeSlot = nullptr;
+};
+
+class WinEHFrameVariableMaterializer : public ValueMaterializer {
+public:
+  WinEHFrameVariableMaterializer(Function *OutlinedFn, Value *ParentFP,
+                                 FrameVarInfoMap &FrameVarInfo);
+  ~WinEHFrameVariableMaterializer() override {}
+
+  Value *materializeValueFor(Value *V) override;
+
+  void escapeCatchObject(Value *V);
+
+private:
+  FrameVarInfoMap &FrameVarInfo;
+  IRBuilder<> Builder;
+};
+
+class LandingPadMap {
+public:
+  LandingPadMap() : OriginLPad(nullptr) {}
+  void mapLandingPad(const LandingPadInst *LPad);
+
+  bool isInitialized() { return OriginLPad != nullptr; }
+
+  bool isOriginLandingPadBlock(const BasicBlock *BB) const;
+  bool isLandingPadSpecificInst(const Instruction *Inst) const;
+
+  void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
+                     Value *SelectorValue) const;
+
+private:
+  const LandingPadInst *OriginLPad;
+  // We will normally only see one of each of these instructions, but
+  // if more than one occurs for some reason we can handle that.
+  TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
+  TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
+};
+
+class WinEHCloningDirectorBase : public CloningDirector {
+public:
+  WinEHCloningDirectorBase(Function *HandlerFn, Value *ParentFP,
+                           FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+      : Materializer(HandlerFn, ParentFP, VarInfo),
+        SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
+        Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())),
+        LPadMap(LPadMap), ParentFP(ParentFP) {}
+
+  CloningAction handleInstruction(ValueToValueMapTy &VMap,
+                                  const Instruction *Inst,
+                                  BasicBlock *NewBB) override;
+
+  virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+                                         const Instruction *Inst,
+                                         BasicBlock *NewBB) = 0;
+  virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap,
+                                       const Instruction *Inst,
+                                       BasicBlock *NewBB) = 0;
+  virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+                                        const Instruction *Inst,
+                                        BasicBlock *NewBB) = 0;
+  virtual CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
+                                         const IndirectBrInst *IBr,
+                                         BasicBlock *NewBB) = 0;
+  virtual CloningAction handleInvoke(ValueToValueMapTy &VMap,
+                                     const InvokeInst *Invoke,
+                                     BasicBlock *NewBB) = 0;
+  virtual CloningAction handleResume(ValueToValueMapTy &VMap,
+                                     const ResumeInst *Resume,
+                                     BasicBlock *NewBB) = 0;
+  virtual CloningAction handleCompare(ValueToValueMapTy &VMap,
+                                      const CmpInst *Compare,
+                                      BasicBlock *NewBB) = 0;
+  virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                         const LandingPadInst *LPad,
+                                         BasicBlock *NewBB) = 0;
+
+  ValueMaterializer *getValueMaterializer() override { return &Materializer; }
+
+protected:
+  WinEHFrameVariableMaterializer Materializer;
+  Type *SelectorIDType;
+  Type *Int8PtrType;
+  LandingPadMap &LPadMap;
+
+  /// The value representing the parent frame pointer.
+  Value *ParentFP;
+};
+
+class WinEHCatchDirector : public WinEHCloningDirectorBase {
+public:
+  WinEHCatchDirector(
+      Function *CatchFn, Value *ParentFP, Value *Selector,
+      FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap,
+      DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads,
+      DominatorTree *DT, SmallPtrSetImpl<BasicBlock *> &EHBlocks)
+      : WinEHCloningDirectorBase(CatchFn, ParentFP, VarInfo, LPadMap),
+        CurrentSelector(Selector->stripPointerCasts()),
+        ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads),
+        DT(DT), EHBlocks(EHBlocks) {}
+
+  CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+                                 const Instruction *Inst,
+                                 BasicBlock *NewBB) override;
+  CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
+                               BasicBlock *NewBB) override;
+  CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+                                const Instruction *Inst,
+                                BasicBlock *NewBB) override;
+  CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
+                                 const IndirectBrInst *IBr,
+                                 BasicBlock *NewBB) override;
+  CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
+                             BasicBlock *NewBB) override;
+  CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
+                             BasicBlock *NewBB) override;
+  CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
+                              BasicBlock *NewBB) override;
+  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                 const LandingPadInst *LPad,
+                                 BasicBlock *NewBB) override;
+
+  Value *getExceptionVar() { return ExceptionObjectVar; }
+  TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
+
+private:
+  Value *CurrentSelector;
+
+  Value *ExceptionObjectVar;
+  TinyPtrVector<BasicBlock *> ReturnTargets;
+
+  // This will be a reference to the field of the same name in the WinEHPrepare
+  // object which instantiates this WinEHCatchDirector object.
+  DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP;
+  DominatorTree *DT;
+  SmallPtrSetImpl<BasicBlock *> &EHBlocks;
+};
+
+class WinEHCleanupDirector : public WinEHCloningDirectorBase {
+public:
+  WinEHCleanupDirector(Function *CleanupFn, Value *ParentFP,
+                       FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+      : WinEHCloningDirectorBase(CleanupFn, ParentFP, VarInfo,
+                                 LPadMap) {}
+
+  CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+                                 const Instruction *Inst,
+                                 BasicBlock *NewBB) override;
+  CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
+                               BasicBlock *NewBB) override;
+  CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+                                const Instruction *Inst,
+                                BasicBlock *NewBB) override;
+  CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
+                                 const IndirectBrInst *IBr,
+                                 BasicBlock *NewBB) override;
+  CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
+                             BasicBlock *NewBB) override;
+  CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
+                             BasicBlock *NewBB) override;
+  CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
+                              BasicBlock *NewBB) override;
+  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                 const LandingPadInst *LPad,
+                                 BasicBlock *NewBB) override;
+};
+
+class LandingPadActions {
+public:
+  LandingPadActions() : HasCleanupHandlers(false) {}
+
+  void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); }
+  void insertCleanupHandler(CleanupHandler *Action) {
+    Actions.push_back(Action);
+    HasCleanupHandlers = true;
+  }
+
+  bool includesCleanup() const { return HasCleanupHandlers; }
+
+  SmallVectorImpl<ActionHandler *> &actions() { return Actions; }
+  SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
+  SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
+
+private:
+  // Note that this class does not own the ActionHandler objects in this vector.
+  // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap
+  // in the WinEHPrepare class.
+  SmallVector<ActionHandler *, 4> Actions;
+  bool HasCleanupHandlers;
+};
+
+} // end anonymous namespace
+
+char WinEHPrepare::ID = 0;
+INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions",
+                   false, false)
+
+FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
+  return new WinEHPrepare(TM);
+}
+
+bool WinEHPrepare::runOnFunction(Function &Fn) {
+  // No need to prepare outlined handlers.
+  if (Fn.hasFnAttribute("wineh-parent"))
+    return false;
+
+  SmallVector<LandingPadInst *, 4> LPads;
+  SmallVector<ResumeInst *, 4> Resumes;
+  for (BasicBlock &BB : Fn) {
+    if (auto *LP = BB.getLandingPadInst())
+      LPads.push_back(LP);
+    if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator()))
+      Resumes.push_back(Resume);
+  }
+
+  // No need to prepare functions that lack landing pads.
+  if (LPads.empty())
+    return false;
+
+  // Classify the personality to see what kind of preparation we need.
+  Personality = classifyEHPersonality(LPads.back()->getPersonalityFn());
+
+  // Do nothing if this is not an MSVC personality.
+  if (!isMSVCEHPersonality(Personality))
+    return false;
+
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  // If there were any landing pads, prepareExceptionHandlers will make changes.
+  prepareExceptionHandlers(Fn, LPads);
+  return true;
+}
+
+bool WinEHPrepare::doFinalization(Module &M) { return false; }
+
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+}
+
+static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
+                               Constant *&Selector, BasicBlock *&NextBB);
+
+// Finds blocks reachable from the starting set Worklist. Does not follow unwind
+// edges or blocks listed in StopPoints.
+static void findReachableBlocks(SmallPtrSetImpl<BasicBlock *> &ReachableBBs,
+                                SetVector<BasicBlock *> &Worklist,
+                                const SetVector<BasicBlock *> *StopPoints) {
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+
+    // Don't cross blocks that we should stop at.
+    if (StopPoints && StopPoints->count(BB))
+      continue;
+
+    if (!ReachableBBs.insert(BB).second)
+      continue; // Already visited.
+
+    // Don't follow unwind edges of invokes.
+    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Worklist.insert(II->getNormalDest());
+      continue;
+    }
+
+    // Otherwise, follow all successors.
+    Worklist.insert(succ_begin(BB), succ_end(BB));
+  }
+}
+
+// Attempt to find an instruction where a block can be split before
+// a call to llvm.eh.begincatch and its operands.  If the block
+// begins with the begincatch call or one of its adjacent operands
+// the block will not be split.
+static Instruction *findBeginCatchSplitPoint(BasicBlock *BB,
+                                             IntrinsicInst *II) {
+  // If the begincatch call is already the first instruction in the block,
+  // don't split.
+  Instruction *FirstNonPHI = BB->getFirstNonPHI();
+  if (II == FirstNonPHI)
+    return nullptr;
+
+  // If either operand is in the same basic block as the instruction and
+  // isn't used by another instruction before the begincatch call, include it
+  // in the split block.
+  auto *Op0 = dyn_cast<Instruction>(II->getOperand(0));
+  auto *Op1 = dyn_cast<Instruction>(II->getOperand(1));
+
+  Instruction *I = II->getPrevNode();
+  Instruction *LastI = II;
+
+  while (I == Op0 || I == Op1) {
+    // If the block begins with one of the operands and there are no other
+    // instructions between the operand and the begincatch call, don't split.
+    if (I == FirstNonPHI)
+      return nullptr;
+
+    LastI = I;
+    I = I->getPrevNode();
+  }
+
+  // If there is at least one instruction in the block before the begincatch
+  // call and its operands, split the block at either the begincatch or
+  // its operand.
+  return LastI;
+}
+
+/// Find all points where exceptional control rejoins normal control flow via
+/// llvm.eh.endcatch. Add them to the normal bb reachability worklist.
+void WinEHPrepare::findCXXEHReturnPoints(
+    Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
+  for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
+    BasicBlock *BB = BBI;
+    for (Instruction &I : *BB) {
+      if (match(&I, m_Intrinsic<Intrinsic::eh_begincatch>())) {
+        Instruction *SplitPt =
+            findBeginCatchSplitPoint(BB, cast<IntrinsicInst>(&I));
+        if (SplitPt) {
+          // Split the block before the llvm.eh.begincatch call to allow
+          // cleanup and catch code to be distinguished later.
+          // Do not update BBI because we still need to process the
+          // portion of the block that we are splitting off.
+          SplitBlock(BB, SplitPt, DT);
+          break;
+        }
+      }
+      if (match(&I, m_Intrinsic<Intrinsic::eh_endcatch>())) {
+        // Split the block after the call to llvm.eh.endcatch if there is
+        // anything other than an unconditional branch, or if the successor
+        // starts with a phi.
+        auto *Br = dyn_cast<BranchInst>(I.getNextNode());
+        if (!Br || !Br->isUnconditional() ||
+            isa<PHINode>(Br->getSuccessor(0)->begin())) {
+          DEBUG(dbgs() << "splitting block " << BB->getName()
+                       << " with llvm.eh.endcatch\n");
+          BBI = SplitBlock(BB, I.getNextNode(), DT);
+        }
+        // The next BB is normal control flow.
+        EHReturnBlocks.insert(BB->getTerminator()->getSuccessor(0));
+        break;
+      }
+    }
+  }
+}
+
+static bool isCatchAllLandingPad(const BasicBlock *BB) {
+  const LandingPadInst *LP = BB->getLandingPadInst();
+  if (!LP)
+    return false;
+  unsigned N = LP->getNumClauses();
+  return (N > 0 && LP->isCatch(N - 1) &&
+          isa<ConstantPointerNull>(LP->getClause(N - 1)));
+}
+
+/// Find all points where exceptions control rejoins normal control flow via
+/// selector dispatch.
+void WinEHPrepare::findSEHEHReturnPoints(
+    Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
+  for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
+    BasicBlock *BB = BBI;
+    // If the landingpad is a catch-all, treat the whole lpad as if it is
+    // reachable from normal control flow.
+    // FIXME: This is imprecise. We need a better way of identifying where a
+    // catch-all starts and cleanups stop. As far as LLVM is concerned, there
+    // is no difference.
+    if (isCatchAllLandingPad(BB)) {
+      EHReturnBlocks.insert(BB);
+      continue;
+    }
+
+    BasicBlock *CatchHandler;
+    BasicBlock *NextBB;
+    Constant *Selector;
+    if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) {
+      // Split the edge if there is a phi node. Returning from EH to a phi node
+      // is just as impossible as having a phi after an indirectbr.
+      if (isa<PHINode>(CatchHandler->begin())) {
+        DEBUG(dbgs() << "splitting EH return edge from " << BB->getName()
+                     << " to " << CatchHandler->getName() << '\n');
+        BBI = CatchHandler = SplitCriticalEdge(
+            BB, std::find(succ_begin(BB), succ_end(BB), CatchHandler));
+      }
+      EHReturnBlocks.insert(CatchHandler);
+    }
+  }
+}
+
+void WinEHPrepare::identifyEHBlocks(Function &F, 
+                                    SmallVectorImpl<LandingPadInst *> &LPads) {
+  DEBUG(dbgs() << "Demoting values live across exception handlers in function "
+               << F.getName() << '\n');
+
+  // Build a set of all non-exceptional blocks and exceptional blocks.
+  // - Non-exceptional blocks are blocks reachable from the entry block while
+  //   not following invoke unwind edges.
+  // - Exceptional blocks are blocks reachable from landingpads. Analysis does
+  //   not follow llvm.eh.endcatch blocks, which mark a transition from
+  //   exceptional to normal control.
+
+  if (Personality == EHPersonality::MSVC_CXX)
+    findCXXEHReturnPoints(F, EHReturnBlocks);
+  else
+    findSEHEHReturnPoints(F, EHReturnBlocks);
+
+  DEBUG({
+    dbgs() << "identified the following blocks as EH return points:\n";
+    for (BasicBlock *BB : EHReturnBlocks)
+      dbgs() << "  " << BB->getName() << '\n';
+  });
+
+// Join points should not have phis at this point, unless they are a
+// landingpad, in which case we will demote their phis later.
+#ifndef NDEBUG
+  for (BasicBlock *BB : EHReturnBlocks)
+    assert((BB->isLandingPad() || !isa<PHINode>(BB->begin())) &&
+           "non-lpad EH return block has phi");
+#endif
+
+  // Normal blocks are the blocks reachable from the entry block and all EH
+  // return points.
+  SetVector<BasicBlock *> Worklist;
+  Worklist = EHReturnBlocks;
+  Worklist.insert(&F.getEntryBlock());
+  findReachableBlocks(NormalBlocks, Worklist, nullptr);
+  DEBUG({
+    dbgs() << "marked the following blocks as normal:\n";
+    for (BasicBlock *BB : NormalBlocks)
+      dbgs() << "  " << BB->getName() << '\n';
+  });
+
+  // Exceptional blocks are the blocks reachable from landingpads that don't
+  // cross EH return points.
+  Worklist.clear();
+  for (auto *LPI : LPads)
+    Worklist.insert(LPI->getParent());
+  findReachableBlocks(EHBlocks, Worklist, &EHReturnBlocks);
+  DEBUG({
+    dbgs() << "marked the following blocks as exceptional:\n";
+    for (BasicBlock *BB : EHBlocks)
+      dbgs() << "  " << BB->getName() << '\n';
+  });
+
+}
+
+/// Ensure that all values live into and out of exception handlers are stored
+/// in memory.
+/// FIXME: This falls down when values are defined in one handler and live into
+/// another handler. For example, a cleanup defines a value used only by a
+/// catch handler.
+void WinEHPrepare::demoteValuesLiveAcrossHandlers(
+    Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
+  DEBUG(dbgs() << "Demoting values live across exception handlers in function "
+               << F.getName() << '\n');
+
+  // identifyEHBlocks() should have been called before this function.
+  assert(!NormalBlocks.empty());
+
+  SetVector<Argument *> ArgsToDemote;
+  SetVector<Instruction *> InstrsToDemote;
+  for (BasicBlock &BB : F) {
+    bool IsNormalBB = NormalBlocks.count(&BB);
+    bool IsEHBB = EHBlocks.count(&BB);
+    if (!IsNormalBB && !IsEHBB)
+      continue; // Blocks that are neither normal nor EH are unreachable.
+    for (Instruction &I : BB) {
+      for (Value *Op : I.operands()) {
+        // Don't demote static allocas, constants, and labels.
+        if (isa<Constant>(Op) || isa<BasicBlock>(Op) || isa<InlineAsm>(Op))
+          continue;
+        auto *AI = dyn_cast<AllocaInst>(Op);
+        if (AI && AI->isStaticAlloca())
+          continue;
+
+        if (auto *Arg = dyn_cast<Argument>(Op)) {
+          if (IsEHBB) {
+            DEBUG(dbgs() << "Demoting argument " << *Arg
+                         << " used by EH instr: " << I << "\n");
+            ArgsToDemote.insert(Arg);
+          }
+          continue;
+        }
+
+        auto *OpI = cast<Instruction>(Op);
+        BasicBlock *OpBB = OpI->getParent();
+        // If a value is produced and consumed in the same BB, we don't need to
+        // demote it.
+        if (OpBB == &BB)
+          continue;
+        bool IsOpNormalBB = NormalBlocks.count(OpBB);
+        bool IsOpEHBB = EHBlocks.count(OpBB);
+        if (IsNormalBB != IsOpNormalBB || IsEHBB != IsOpEHBB) {
+          DEBUG({
+            dbgs() << "Demoting instruction live in-out from EH:\n";
+            dbgs() << "Instr: " << *OpI << '\n';
+            dbgs() << "User: " << I << '\n';
+          });
+          InstrsToDemote.insert(OpI);
+        }
+      }
+    }
+  }
+
+  // Demote values live into and out of handlers.
+  // FIXME: This demotion is inefficient. We should insert spills at the point
+  // of definition, insert one reload in each handler that uses the value, and
+  // insert reloads in the BB used to rejoin normal control flow.
+  Instruction *AllocaInsertPt = F.getEntryBlock().getFirstInsertionPt();
+  for (Instruction *I : InstrsToDemote)
+    DemoteRegToStack(*I, false, AllocaInsertPt);
+
+  // Demote arguments separately, and only for uses in EH blocks.
+  for (Argument *Arg : ArgsToDemote) {
+    auto *Slot = new AllocaInst(Arg->getType(), nullptr,
+                                Arg->getName() + ".reg2mem", AllocaInsertPt);
+    SmallVector<User *, 4> Users(Arg->user_begin(), Arg->user_end());
+    for (User *U : Users) {
+      auto *I = dyn_cast<Instruction>(U);
+      if (I && EHBlocks.count(I->getParent())) {
+        auto *Reload = new LoadInst(Slot, Arg->getName() + ".reload", false, I);
+        U->replaceUsesOfWith(Arg, Reload);
+      }
+    }
+    new StoreInst(Arg, Slot, AllocaInsertPt);
+  }
+
+  // Demote landingpad phis, as the landingpad will be removed from the machine
+  // CFG.
+  for (LandingPadInst *LPI : LPads) {
+    BasicBlock *BB = LPI->getParent();
+    while (auto *Phi = dyn_cast<PHINode>(BB->begin()))
+      DemotePHIToStack(Phi, AllocaInsertPt);
+  }
+
+  DEBUG(dbgs() << "Demoted " << InstrsToDemote.size() << " instructions and "
+               << ArgsToDemote.size() << " arguments for WinEHPrepare\n\n");
+}
+
+bool WinEHPrepare::prepareExceptionHandlers(
+    Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
+  // Don't run on functions that are already prepared.
+  for (LandingPadInst *LPad : LPads) {
+    BasicBlock *LPadBB = LPad->getParent();
+    for (Instruction &Inst : *LPadBB)
+      if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>()))
+        return false;
+  }
+
+  identifyEHBlocks(F, LPads);
+  demoteValuesLiveAcrossHandlers(F, LPads);
+
+  // These containers are used to re-map frame variables that are used in
+  // outlined catch and cleanup handlers.  They will be populated as the
+  // handlers are outlined.
+  FrameVarInfoMap FrameVarInfo;
+
+  bool HandlersOutlined = false;
+
+  Module *M = F.getParent();
+  LLVMContext &Context = M->getContext();
+
+  // Create a new function to receive the handler contents.
+  PointerType *Int8PtrType = Type::getInt8PtrTy(Context);
+  Type *Int32Type = Type::getInt32Ty(Context);
+  Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions);
+
+  if (isAsynchronousEHPersonality(Personality)) {
+    // FIXME: Switch the ehptr type to i32 and then switch this.
+    SEHExceptionCodeSlot =
+        new AllocaInst(Int8PtrType, nullptr, "seh_exception_code",
+                       F.getEntryBlock().getFirstInsertionPt());
+  }
+
+  // In order to handle the case where one outlined catch handler returns
+  // to a block within another outlined catch handler that would otherwise
+  // be unreachable, we need to outline the nested landing pad before we
+  // outline the landing pad which encloses it.
+  if (!isAsynchronousEHPersonality(Personality))
+    std::sort(LPads.begin(), LPads.end(),
+              [this](LandingPadInst *const &L, LandingPadInst *const &R) {
+                return DT->properlyDominates(R->getParent(), L->getParent());
+              });
+
+  // This container stores the llvm.eh.recover and IndirectBr instructions
+  // that make up the body of each landing pad after it has been outlined.
+  // We need to defer the population of the target list for the indirectbr
+  // until all landing pads have been outlined so that we can handle the
+  // case of blocks in the target that are reached only from nested
+  // landing pads.
+  SmallVector<std::pair<CallInst*, IndirectBrInst *>, 4> LPadImpls;
+
+  for (LandingPadInst *LPad : LPads) {
+    // Look for evidence that this landingpad has already been processed.
+    bool LPadHasActionList = false;
+    BasicBlock *LPadBB = LPad->getParent();
+    for (Instruction &Inst : *LPadBB) {
+      if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) {
+        LPadHasActionList = true;
+        break;
+      }
+    }
+
+    // If we've already outlined the handlers for this landingpad,
+    // there's nothing more to do here.
+    if (LPadHasActionList)
+      continue;
+
+    // If either of the values in the aggregate returned by the landing pad is
+    // extracted and stored to memory, promote the stored value to a register.
+    promoteLandingPadValues(LPad);
+
+    LandingPadActions Actions;
+    mapLandingPadBlocks(LPad, Actions);
+
+    HandlersOutlined |= !Actions.actions().empty();
+    for (ActionHandler *Action : Actions) {
+      if (Action->hasBeenProcessed())
+        continue;
+      BasicBlock *StartBB = Action->getStartBlock();
+
+      // SEH doesn't do any outlining for catches. Instead, pass the handler
+      // basic block addr to llvm.eh.actions and list the block as a return
+      // target.
+      if (isAsynchronousEHPersonality(Personality)) {
+        if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+          processSEHCatchHandler(CatchAction, StartBB);
+          continue;
+        }
+      }
+
+      outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo);
+    }
+
+    // Split the block after the landingpad instruction so that it is just a
+    // call to llvm.eh.actions followed by indirectbr.
+    assert(!isa<PHINode>(LPadBB->begin()) && "lpad phi not removed");
+    SplitBlock(LPadBB, LPad->getNextNode(), DT);
+    // Erase the branch inserted by the split so we can insert indirectbr.
+    LPadBB->getTerminator()->eraseFromParent();
+
+    // Replace all extracted values with undef and ultimately replace the
+    // landingpad with undef.
+    SmallVector<Instruction *, 4> SEHCodeUses;
+    SmallVector<Instruction *, 4> EHUndefs;
+    for (User *U : LPad->users()) {
+      auto *E = dyn_cast<ExtractValueInst>(U);
+      if (!E)
+        continue;
+      assert(E->getNumIndices() == 1 &&
+             "Unexpected operation: extracting both landing pad values");
+      unsigned Idx = *E->idx_begin();
+      assert((Idx == 0 || Idx == 1) && "unexpected index");
+      if (Idx == 0 && isAsynchronousEHPersonality(Personality))
+        SEHCodeUses.push_back(E);
+      else
+        EHUndefs.push_back(E);
+    }
+    for (Instruction *E : EHUndefs) {
+      E->replaceAllUsesWith(UndefValue::get(E->getType()));
+      E->eraseFromParent();
+    }
+    LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
+
+    // Rewrite uses of the exception pointer to loads of an alloca.
+    for (Instruction *E : SEHCodeUses) {
+      SmallVector<Use *, 4> Uses;
+      for (Use &U : E->uses())
+        Uses.push_back(&U);
+      for (Use *U : Uses) {
+        auto *I = cast<Instruction>(U->getUser());
+        if (isa<ResumeInst>(I))
+          continue;
+        LoadInst *LI;
+        if (auto *Phi = dyn_cast<PHINode>(I))
+          LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false,
+                            Phi->getIncomingBlock(*U));
+        else
+          LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I);
+        U->set(LI);
+      }
+      E->replaceAllUsesWith(UndefValue::get(E->getType()));
+      E->eraseFromParent();
+    }
+
+    // Add a call to describe the actions for this landing pad.
+    std::vector<Value *> ActionArgs;
+    for (ActionHandler *Action : Actions) {
+      // Action codes from docs are: 0 cleanup, 1 catch.
+      if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+        ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
+        ActionArgs.push_back(CatchAction->getSelector());
+        // Find the frame escape index of the exception object alloca in the
+        // parent.
+        int FrameEscapeIdx = -1;
+        Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
+        if (EHObj && !isa<ConstantPointerNull>(EHObj)) {
+          auto I = FrameVarInfo.find(EHObj);
+          assert(I != FrameVarInfo.end() &&
+                 "failed to map llvm.eh.begincatch var");
+          FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I);
+        }
+        ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx));
+      } else {
+        ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
+      }
+      ActionArgs.push_back(Action->getHandlerBlockOrFunc());
+    }
+    CallInst *Recover =
+        CallInst::Create(ActionIntrin, ActionArgs, "recover", LPadBB);
+
+    SetVector<BasicBlock *> ReturnTargets;
+    for (ActionHandler *Action : Actions) {
+      if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+        const auto &CatchTargets = CatchAction->getReturnTargets();
+        ReturnTargets.insert(CatchTargets.begin(), CatchTargets.end());
+      }
+    }
+    IndirectBrInst *Branch =
+        IndirectBrInst::Create(Recover, ReturnTargets.size(), LPadBB);
+    for (BasicBlock *Target : ReturnTargets)
+      Branch->addDestination(Target);
+
+    if (!isAsynchronousEHPersonality(Personality)) {
+      // C++ EH must repopulate the targets later to handle the case of
+      // targets that are reached indirectly through nested landing pads.
+      LPadImpls.push_back(std::make_pair(Recover, Branch));
+    }
+
+  } // End for each landingpad
+
+  // If nothing got outlined, there is no more processing to be done.
+  if (!HandlersOutlined)
+    return false;
+
+  // Replace any nested landing pad stubs with the correct action handler.
+  // This must be done before we remove unreachable blocks because it
+  // cleans up references to outlined blocks that will be deleted.
+  for (auto &LPadPair : NestedLPtoOriginalLP)
+    completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo);
+  NestedLPtoOriginalLP.clear();
+
+  // Update the indirectbr instructions' target lists if necessary.
+  SetVector<BasicBlock*> CheckedTargets;
+  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+  for (auto &LPadImplPair : LPadImpls) {
+    IntrinsicInst *Recover = cast<IntrinsicInst>(LPadImplPair.first);
+    IndirectBrInst *Branch = LPadImplPair.second;
+
+    // Get a list of handlers called by 
+    parseEHActions(Recover, ActionList);
+
+    // Add an indirect branch listing possible successors of the catch handlers.
+    SetVector<BasicBlock *> ReturnTargets;
+    for (const auto &Action : ActionList) {
+      if (auto *CA = dyn_cast<CatchHandler>(Action.get())) {
+        Function *Handler = cast<Function>(CA->getHandlerBlockOrFunc());
+        getPossibleReturnTargets(&F, Handler, ReturnTargets);
+      }
+    }
+    ActionList.clear();
+    // Clear any targets we already knew about.
+    for (unsigned int I = 0, E = Branch->getNumDestinations(); I < E; ++I) {
+      BasicBlock *KnownTarget = Branch->getDestination(I);
+      if (ReturnTargets.count(KnownTarget))
+        ReturnTargets.remove(KnownTarget);
+    }
+    for (BasicBlock *Target : ReturnTargets) {
+      Branch->addDestination(Target);
+      // The target may be a block that we excepted to get pruned.
+      // If it is, it may contain a call to llvm.eh.endcatch.
+      if (CheckedTargets.insert(Target)) {
+        // Earlier preparations guarantee that all calls to llvm.eh.endcatch
+        // will be followed by an unconditional branch.
+        auto *Br = dyn_cast<BranchInst>(Target->getTerminator());
+        if (Br && Br->isUnconditional() &&
+            Br != Target->getFirstNonPHIOrDbgOrLifetime()) {
+          Instruction *Prev = Br->getPrevNode();
+          if (match(cast<Value>(Prev), m_Intrinsic<Intrinsic::eh_endcatch>()))
+            Prev->eraseFromParent();
+        }
+      }
+    }
+  }
+  LPadImpls.clear();
+
+  F.addFnAttr("wineh-parent", F.getName());
+
+  // Delete any blocks that were only used by handlers that were outlined above.
+  removeUnreachableBlocks(F);
+
+  BasicBlock *Entry = &F.getEntryBlock();
+  IRBuilder<> Builder(F.getParent()->getContext());
+  Builder.SetInsertPoint(Entry->getFirstInsertionPt());
+
+  Function *FrameEscapeFn =
+      Intrinsic::getDeclaration(M, Intrinsic::frameescape);
+  Function *RecoverFrameFn =
+      Intrinsic::getDeclaration(M, Intrinsic::framerecover);
+  SmallVector<Value *, 8> AllocasToEscape;
+
+  // Scan the entry block for an existing call to llvm.frameescape. We need to
+  // keep escaping those objects.
+  for (Instruction &I : F.front()) {
+    auto *II = dyn_cast<IntrinsicInst>(&I);
+    if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+      auto Args = II->arg_operands();
+      AllocasToEscape.append(Args.begin(), Args.end());
+      II->eraseFromParent();
+      break;
+    }
+  }
+
+  // Finally, replace all of the temporary allocas for frame variables used in
+  // the outlined handlers with calls to llvm.framerecover.
+  for (auto &VarInfoEntry : FrameVarInfo) {
+    Value *ParentVal = VarInfoEntry.first;
+    TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
+    AllocaInst *ParentAlloca = cast<AllocaInst>(ParentVal);
+
+    // FIXME: We should try to sink unescaped allocas from the parent frame into
+    // the child frame. If the alloca is escaped, we have to use the lifetime
+    // markers to ensure that the alloca is only live within the child frame.
+
+    // Add this alloca to the list of things to escape.
+    AllocasToEscape.push_back(ParentAlloca);
+
+    // Next replace all outlined allocas that are mapped to it.
+    for (AllocaInst *TempAlloca : Allocas) {
+      if (TempAlloca == getCatchObjectSentinel())
+        continue; // Skip catch parameter sentinels.
+      Function *HandlerFn = TempAlloca->getParent()->getParent();
+      llvm::Value *FP = HandlerToParentFP[HandlerFn];
+      assert(FP);
+
+      // FIXME: Sink this framerecover into the blocks where it is used.
+      Builder.SetInsertPoint(TempAlloca);
+      Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
+      Value *RecoverArgs[] = {
+          Builder.CreateBitCast(&F, Int8PtrType, ""), FP,
+          llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)};
+      Instruction *RecoveredAlloca =
+          Builder.CreateCall(RecoverFrameFn, RecoverArgs);
+
+      // Add a pointer bitcast if the alloca wasn't an i8.
+      if (RecoveredAlloca->getType() != TempAlloca->getType()) {
+        RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8");
+        RecoveredAlloca = cast<Instruction>(
+            Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType()));
+      }
+      TempAlloca->replaceAllUsesWith(RecoveredAlloca);
+      TempAlloca->removeFromParent();
+      RecoveredAlloca->takeName(TempAlloca);
+      delete TempAlloca;
+    }
+  } // End for each FrameVarInfo entry.
+
+  // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry
+  // block.
+  Builder.SetInsertPoint(&F.getEntryBlock().back());
+  Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
+
+  if (SEHExceptionCodeSlot) {
+    if (SEHExceptionCodeSlot->hasNUses(0))
+      SEHExceptionCodeSlot->eraseFromParent();
+    else if (isAllocaPromotable(SEHExceptionCodeSlot))
+      PromoteMemToReg(SEHExceptionCodeSlot, *DT);
+  }
+
+  // Clean up the handler action maps we created for this function
+  DeleteContainerSeconds(CatchHandlerMap);
+  CatchHandlerMap.clear();
+  DeleteContainerSeconds(CleanupHandlerMap);
+  CleanupHandlerMap.clear();
+  HandlerToParentFP.clear();
+  DT = nullptr;
+  SEHExceptionCodeSlot = nullptr;
+  EHBlocks.clear();
+  NormalBlocks.clear();
+  EHReturnBlocks.clear();
+
+  return HandlersOutlined;
+}
+
+void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) {
+  // If the return values of the landing pad instruction are extracted and
+  // stored to memory, we want to promote the store locations to reg values.
+  SmallVector<AllocaInst *, 2> EHAllocas;
+
+  // The landingpad instruction returns an aggregate value.  Typically, its
+  // value will be passed to a pair of extract value instructions and the
+  // results of those extracts are often passed to store instructions.
+  // In unoptimized code the stored value will often be loaded and then stored
+  // again.
+  for (auto *U : LPad->users()) {
+    ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
+    if (!Extract)
+      continue;
+
+    for (auto *EU : Extract->users()) {
+      if (auto *Store = dyn_cast<StoreInst>(EU)) {
+        auto *AV = cast<AllocaInst>(Store->getPointerOperand());
+        EHAllocas.push_back(AV);
+      }
+    }
+  }
+
+  // We can't do this without a dominator tree.
+  assert(DT);
+
+  if (!EHAllocas.empty()) {
+    PromoteMemToReg(EHAllocas, *DT);
+    EHAllocas.clear();
+  }
+
+  // After promotion, some extracts may be trivially dead. Remove them.
+  SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end());
+  for (auto *U : Users)
+    RecursivelyDeleteTriviallyDeadInstructions(U);
+}
+
+void WinEHPrepare::getPossibleReturnTargets(Function *ParentF,
+                                            Function *HandlerF,
+                                            SetVector<BasicBlock*> &Targets) {
+  for (BasicBlock &BB : *HandlerF) {
+    // If the handler contains landing pads, check for any
+    // handlers that may return directly to a block in the
+    // parent function.
+    if (auto *LPI = BB.getLandingPadInst()) {
+      IntrinsicInst *Recover = cast<IntrinsicInst>(LPI->getNextNode());
+      SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+      parseEHActions(Recover, ActionList);
+      for (const auto &Action : ActionList) {
+        if (auto *CH = dyn_cast<CatchHandler>(Action.get())) {
+          Function *NestedF = cast<Function>(CH->getHandlerBlockOrFunc());
+          getPossibleReturnTargets(ParentF, NestedF, Targets);
+        }
+      }
+    }
+
+    auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+    if (!Ret)
+      continue;
+
+    // Handler functions must always return a block address.
+    BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
+
+    // If this is the handler for a nested landing pad, the
+    // return address may have been remapped to a block in the
+    // parent handler.  We're not interested in those.
+    if (BA->getFunction() != ParentF)
+      continue;
+
+    Targets.insert(BA->getBasicBlock());
+  }
+}
+
+void WinEHPrepare::completeNestedLandingPad(Function *ParentFn,
+                                            LandingPadInst *OutlinedLPad,
+                                            const LandingPadInst *OriginalLPad,
+                                            FrameVarInfoMap &FrameVarInfo) {
+  // Get the nested block and erase the unreachable instruction that was
+  // temporarily inserted as its terminator.
+  LLVMContext &Context = ParentFn->getContext();
+  BasicBlock *OutlinedBB = OutlinedLPad->getParent();
+  // If the nested landing pad was outlined before the landing pad that enclosed
+  // it, it will already be in outlined form.  In that case, we just need to see
+  // if the returns and the enclosing branch instruction need to be updated.
+  IndirectBrInst *Branch =
+      dyn_cast<IndirectBrInst>(OutlinedBB->getTerminator());
+  if (!Branch) {
+    // If the landing pad wasn't in outlined form, it should be a stub with
+    // an unreachable terminator.
+    assert(isa<UnreachableInst>(OutlinedBB->getTerminator()));
+    OutlinedBB->getTerminator()->eraseFromParent();
+    // That should leave OutlinedLPad as the last instruction in its block.
+    assert(&OutlinedBB->back() == OutlinedLPad);
+  }
+
+  // The original landing pad will have already had its action intrinsic
+  // built by the outlining loop.  We need to clone that into the outlined
+  // location.  It may also be necessary to add references to the exception
+  // variables to the outlined handler in which this landing pad is nested
+  // and remap return instructions in the nested handlers that should return
+  // to an address in the outlined handler.
+  Function *OutlinedHandlerFn = OutlinedBB->getParent();
+  BasicBlock::const_iterator II = OriginalLPad;
+  ++II;
+  // The instruction after the landing pad should now be a call to eh.actions.
+  const Instruction *Recover = II;
+  assert(match(Recover, m_Intrinsic<Intrinsic::eh_actions>()));
+  const IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover);
+
+  // Remap the return target in the nested handler.
+  SmallVector<BlockAddress *, 4> ActionTargets;
+  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
+  parseEHActions(EHActions, ActionList);
+  for (const auto &Action : ActionList) {
+    auto *Catch = dyn_cast<CatchHandler>(Action.get());
+    if (!Catch)
+      continue;
+    // The dyn_cast to function here selects C++ catch handlers and skips
+    // SEH catch handlers.
+    auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc());
+    if (!Handler)
+      continue;
+    // Visit all the return instructions, looking for places that return
+    // to a location within OutlinedHandlerFn.
+    for (BasicBlock &NestedHandlerBB : *Handler) {
+      auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator());
+      if (!Ret)
+        continue;
+
+      // Handler functions must always return a block address.
+      BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
+      // The original target will have been in the main parent function,
+      // but if it is the address of a block that has been outlined, it
+      // should be a block that was outlined into OutlinedHandlerFn.
+      assert(BA->getFunction() == ParentFn);
+
+      // Ignore targets that aren't part of an outlined handler function.
+      if (!LPadTargetBlocks.count(BA->getBasicBlock()))
+        continue;
+
+      // If the return value is the address ofF a block that we
+      // previously outlined into the parent handler function, replace
+      // the return instruction and add the mapped target to the list
+      // of possible return addresses.
+      BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()];
+      assert(MappedBB->getParent() == OutlinedHandlerFn);
+      BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB);
+      Ret->eraseFromParent();
+      ReturnInst::Create(Context, NewBA, &NestedHandlerBB);
+      ActionTargets.push_back(NewBA);
+    }
+  }
+  ActionList.clear();
+
+  if (Branch) {
+    // If the landing pad was already in outlined form, just update its targets.
+    for (unsigned int I = Branch->getNumDestinations(); I > 0; --I)
+      Branch->removeDestination(I);
+    // Add the previously collected action targets.
+    for (auto *Target : ActionTargets)
+      Branch->addDestination(Target->getBasicBlock());
+  } else {
+    // If the landing pad was previously stubbed out, fill in its outlined form.
+    IntrinsicInst *NewEHActions = cast<IntrinsicInst>(EHActions->clone());
+    OutlinedBB->getInstList().push_back(NewEHActions);
+
+    // Insert an indirect branch into the outlined landing pad BB.
+    IndirectBrInst *IBr = IndirectBrInst::Create(NewEHActions, 0, OutlinedBB);
+    // Add the previously collected action targets.
+    for (auto *Target : ActionTargets)
+      IBr->addDestination(Target->getBasicBlock());
+  }
+}
+
+// This function examines a block to determine whether the block ends with a
+// conditional branch to a catch handler based on a selector comparison.
+// This function is used both by the WinEHPrepare::findSelectorComparison() and
+// WinEHCleanupDirector::handleTypeIdFor().
+static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
+                               Constant *&Selector, BasicBlock *&NextBB) {
+  ICmpInst::Predicate Pred;
+  BasicBlock *TBB, *FBB;
+  Value *LHS, *RHS;
+
+  if (!match(BB->getTerminator(),
+             m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB)))
+    return false;
+
+  if (!match(LHS,
+             m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) &&
+      !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))))
+    return false;
+
+  if (Pred == CmpInst::ICMP_EQ) {
+    CatchHandler = TBB;
+    NextBB = FBB;
+    return true;
+  }
+
+  if (Pred == CmpInst::ICMP_NE) {
+    CatchHandler = FBB;
+    NextBB = TBB;
+    return true;
+  }
+
+  return false;
+}
+
+static bool isCatchBlock(BasicBlock *BB) {
+  for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+       II != IE; ++II) {
+    if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_begincatch>()))
+      return true;
+  }
+  return false;
+}
+
+static BasicBlock *createStubLandingPad(Function *Handler,
+                                        Value *PersonalityFn) {
+  // FIXME: Finish this!
+  LLVMContext &Context = Handler->getContext();
+  BasicBlock *StubBB = BasicBlock::Create(Context, "stub");
+  Handler->getBasicBlockList().push_back(StubBB);
+  IRBuilder<> Builder(StubBB);
+  LandingPadInst *LPad = Builder.CreateLandingPad(
+      llvm::StructType::get(Type::getInt8PtrTy(Context),
+                            Type::getInt32Ty(Context), nullptr),
+      PersonalityFn, 0);
+  // Insert a call to llvm.eh.actions so that we don't try to outline this lpad.
+  Function *ActionIntrin =
+      Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions);
+  Builder.CreateCall(ActionIntrin, {}, "recover");
+  LPad->setCleanup(true);
+  Builder.CreateUnreachable();
+  return StubBB;
+}
+
+// Cycles through the blocks in an outlined handler function looking for an
+// invoke instruction and inserts an invoke of llvm.donothing with an empty
+// landing pad if none is found.  The code that generates the .xdata tables for
+// the handler needs at least one landing pad to identify the parent function's
+// personality.
+void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler,
+                                                  Value *PersonalityFn) {
+  ReturnInst *Ret = nullptr;
+  UnreachableInst *Unreached = nullptr;
+  for (BasicBlock &BB : *Handler) {
+    TerminatorInst *Terminator = BB.getTerminator();
+    // If we find an invoke, there is nothing to be done.
+    auto *II = dyn_cast<InvokeInst>(Terminator);
+    if (II)
+      return;
+    // If we've already recorded a return instruction, keep looking for invokes.
+    if (!Ret)
+      Ret = dyn_cast<ReturnInst>(Terminator);
+    // If we haven't recorded an unreachable instruction, try this terminator.
+    if (!Unreached)
+      Unreached = dyn_cast<UnreachableInst>(Terminator);
+  }
+
+  // If we got this far, the handler contains no invokes.  We should have seen
+  // at least one return or unreachable instruction.  We'll insert an invoke of
+  // llvm.donothing ahead of that instruction.
+  assert(Ret || Unreached);
+  TerminatorInst *Term;
+  if (Ret)
+    Term = Ret;
+  else
+    Term = Unreached;
+  BasicBlock *OldRetBB = Term->getParent();
+  BasicBlock *NewRetBB = SplitBlock(OldRetBB, Term, DT);
+  // SplitBlock adds an unconditional branch instruction at the end of the
+  // parent block.  We want to replace that with an invoke call, so we can
+  // erase it now.
+  OldRetBB->getTerminator()->eraseFromParent();
+  BasicBlock *StubLandingPad = createStubLandingPad(Handler, PersonalityFn);
+  Function *F =
+      Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing);
+  InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB);
+}
+
+// FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering
+// usually doesn't build LLVM IR, so that's probably the wrong place.
+Function *WinEHPrepare::createHandlerFunc(Type *RetTy, const Twine &Name,
+                                          Module *M, Value *&ParentFP) {
+  // x64 uses a two-argument prototype where the parent FP is the second
+  // argument. x86 uses no arguments, just the incoming EBP value.
+  LLVMContext &Context = M->getContext();
+  FunctionType *FnType;
+  if (TheTriple.getArch() == Triple::x86_64) {
+    Type *Int8PtrType = Type::getInt8PtrTy(Context);
+    Type *ArgTys[2] = {Int8PtrType, Int8PtrType};
+    FnType = FunctionType::get(RetTy, ArgTys, false);
+  } else {
+    FnType = FunctionType::get(RetTy, None, false);
+  }
+
+  Function *Handler =
+      Function::Create(FnType, GlobalVariable::InternalLinkage, Name, M);
+  BasicBlock *Entry = BasicBlock::Create(Context, "entry");
+  Handler->getBasicBlockList().push_front(Entry);
+  if (TheTriple.getArch() == Triple::x86_64) {
+    ParentFP = &(Handler->getArgumentList().back());
+  } else {
+    assert(M);
+    Function *FrameAddressFn =
+        Intrinsic::getDeclaration(M, Intrinsic::frameaddress);
+    Value *Args[1] = {ConstantInt::get(Type::getInt32Ty(Context), 1)};
+    ParentFP = CallInst::Create(FrameAddressFn, Args, "parent_fp",
+                                &Handler->getEntryBlock());
+  }
+  return Handler;
+}
+
+bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
+                                  LandingPadInst *LPad, BasicBlock *StartBB,
+                                  FrameVarInfoMap &VarInfo) {
+  Module *M = SrcFn->getParent();
+  LLVMContext &Context = M->getContext();
+  Type *Int8PtrType = Type::getInt8PtrTy(Context);
+
+  // Create a new function to receive the handler contents.
+  Value *ParentFP;
+  Function *Handler;
+  if (Action->getType() == Catch) {
+    Handler = createHandlerFunc(Int8PtrType, SrcFn->getName() + ".catch", M,
+                                ParentFP);
+  } else {
+    Handler = createHandlerFunc(Type::getVoidTy(Context),
+                                SrcFn->getName() + ".cleanup", M, ParentFP);
+  }
+  HandlerToParentFP[Handler] = ParentFP;
+  Handler->addFnAttr("wineh-parent", SrcFn->getName());
+  BasicBlock *Entry = &Handler->getEntryBlock();
+
+  // Generate a standard prolog to setup the frame recovery structure.
+  IRBuilder<> Builder(Context);
+  Builder.SetInsertPoint(Entry);
+  Builder.SetCurrentDebugLocation(LPad->getDebugLoc());
+
+  std::unique_ptr<WinEHCloningDirectorBase> Director;
+
+  ValueToValueMapTy VMap;
+
+  LandingPadMap &LPadMap = LPadMaps[LPad];
+  if (!LPadMap.isInitialized())
+    LPadMap.mapLandingPad(LPad);
+  if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+    Constant *Sel = CatchAction->getSelector();
+    Director.reset(new WinEHCatchDirector(Handler, ParentFP, Sel, VarInfo,
+                                          LPadMap, NestedLPtoOriginalLP, DT,
+                                          EHBlocks));
+    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
+                          ConstantInt::get(Type::getInt32Ty(Context), 1));
+  } else {
+    Director.reset(
+        new WinEHCleanupDirector(Handler, ParentFP, VarInfo, LPadMap));
+    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
+                          UndefValue::get(Type::getInt32Ty(Context)));
+  }
+
+  SmallVector<ReturnInst *, 8> Returns;
+  ClonedCodeInfo OutlinedFunctionInfo;
+
+  // If the start block contains PHI nodes, we need to map them.
+  BasicBlock::iterator II = StartBB->begin();
+  while (auto *PN = dyn_cast<PHINode>(II)) {
+    bool Mapped = false;
+    // Look for PHI values that we have already mapped (such as the selector).
+    for (Value *Val : PN->incoming_values()) {
+      if (VMap.count(Val)) {
+        VMap[PN] = VMap[Val];
+        Mapped = true;
+      }
+    }
+    // If we didn't find a match for this value, map it as an undef.
+    if (!Mapped) {
+      VMap[PN] = UndefValue::get(PN->getType());
+    }
+    ++II;
+  }
+
+  // The landing pad value may be used by PHI nodes.  It will ultimately be
+  // eliminated, but we need it in the map for intermediate handling.
+  VMap[LPad] = UndefValue::get(LPad->getType());
+
+  // Skip over PHIs and, if applicable, landingpad instructions.
+  II = StartBB->getFirstInsertionPt();
+
+  CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap,
+                            /*ModuleLevelChanges=*/false, Returns, "",
+                            &OutlinedFunctionInfo, Director.get());
+
+  // Move all the instructions in the cloned "entry" block into our entry block.
+  // Depending on how the parent function was laid out, the block that will
+  // correspond to the outlined entry block may not be the first block in the
+  // list.  We can recognize it, however, as the cloned block which has no
+  // predecessors.  Any other block wouldn't have been cloned if it didn't
+  // have a predecessor which was also cloned.
+  Function::iterator ClonedIt = std::next(Function::iterator(Entry));
+  while (!pred_empty(ClonedIt))
+    ++ClonedIt;
+  BasicBlock *ClonedEntryBB = ClonedIt;
+  assert(ClonedEntryBB);
+  Entry->getInstList().splice(Entry->end(), ClonedEntryBB->getInstList());
+  ClonedEntryBB->eraseFromParent();
+
+  // Make sure we can identify the handler's personality later.
+  addStubInvokeToHandlerIfNeeded(Handler, LPad->getPersonalityFn());
+
+  if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+    WinEHCatchDirector *CatchDirector =
+        reinterpret_cast<WinEHCatchDirector *>(Director.get());
+    CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
+    CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
+
+    // Look for blocks that are not part of the landing pad that we just
+    // outlined but terminate with a call to llvm.eh.endcatch and a
+    // branch to a block that is in the handler we just outlined.
+    // These blocks will be part of a nested landing pad that intends to
+    // return to an address in this handler.  This case is best handled
+    // after both landing pads have been outlined, so for now we'll just
+    // save the association of the blocks in LPadTargetBlocks.  The
+    // return instructions which are created from these branches will be
+    // replaced after all landing pads have been outlined.
+    for (const auto MapEntry : VMap) {
+      // VMap maps all values and blocks that were just cloned, but dead
+      // blocks which were pruned will map to nullptr.
+      if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr)
+        continue;
+      const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first);
+      for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) {
+        auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator());
+        if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1)
+          continue;
+        BasicBlock::iterator II = const_cast<BranchInst *>(Branch);
+        --II;
+        if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) {
+          // This would indicate that a nested landing pad wants to return
+          // to a block that is outlined into two different handlers.
+          assert(!LPadTargetBlocks.count(MappedBB));
+          LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second);
+        }
+      }
+    }
+  } // End if (CatchAction)
+
+  Action->setHandlerBlockOrFunc(Handler);
+
+  return true;
+}
+
+/// This BB must end in a selector dispatch. All we need to do is pass the
+/// handler block to llvm.eh.actions and list it as a possible indirectbr
+/// target.
+void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction,
+                                          BasicBlock *StartBB) {
+  BasicBlock *HandlerBB;
+  BasicBlock *NextBB;
+  Constant *Selector;
+  bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB);
+  if (Res) {
+    // If this was EH dispatch, this must be a conditional branch to the handler
+    // block.
+    // FIXME: Handle instructions in the dispatch block. Currently we drop them,
+    // leading to crashes if some optimization hoists stuff here.
+    assert(CatchAction->getSelector() && HandlerBB &&
+           "expected catch EH dispatch");
+  } else {
+    // This must be a catch-all. Split the block after the landingpad.
+    assert(CatchAction->getSelector()->isNullValue() && "expected catch-all");
+    HandlerBB = SplitBlock(StartBB, StartBB->getFirstInsertionPt(), DT);
+  }
+  IRBuilder<> Builder(HandlerBB->getFirstInsertionPt());
+  Function *EHCodeFn = Intrinsic::getDeclaration(
+      StartBB->getParent()->getParent(), Intrinsic::eh_exceptioncode);
+  Value *Code = Builder.CreateCall(EHCodeFn, {}, "sehcode");
+  Code = Builder.CreateIntToPtr(Code, SEHExceptionCodeSlot->getAllocatedType());
+  Builder.CreateStore(Code, SEHExceptionCodeSlot);
+  CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB));
+  TinyPtrVector<BasicBlock *> Targets(HandlerBB);
+  CatchAction->setReturnTargets(Targets);
+}
+
+void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
+  // Each instance of this class should only ever be used to map a single
+  // landing pad.
+  assert(OriginLPad == nullptr || OriginLPad == LPad);
+
+  // If the landing pad has already been mapped, there's nothing more to do.
+  if (OriginLPad == LPad)
+    return;
+
+  OriginLPad = LPad;
+
+  // The landingpad instruction returns an aggregate value.  Typically, its
+  // value will be passed to a pair of extract value instructions and the
+  // results of those extracts will have been promoted to reg values before
+  // this routine is called.
+  for (auto *U : LPad->users()) {
+    const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
+    if (!Extract)
+      continue;
+    assert(Extract->getNumIndices() == 1 &&
+           "Unexpected operation: extracting both landing pad values");
+    unsigned int Idx = *(Extract->idx_begin());
+    assert((Idx == 0 || Idx == 1) &&
+           "Unexpected operation: extracting an unknown landing pad element");
+    if (Idx == 0) {
+      ExtractedEHPtrs.push_back(Extract);
+    } else if (Idx == 1) {
+      ExtractedSelectors.push_back(Extract);
+    }
+  }
+}
+
+bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const {
+  return BB->getLandingPadInst() == OriginLPad;
+}
+
+bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
+  if (Inst == OriginLPad)
+    return true;
+  for (auto *Extract : ExtractedEHPtrs) {
+    if (Inst == Extract)
+      return true;
+  }
+  for (auto *Extract : ExtractedSelectors) {
+    if (Inst == Extract)
+      return true;
+  }
+  return false;
+}
+
+void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
+                                  Value *SelectorValue) const {
+  // Remap all landing pad extract instructions to the specified values.
+  for (auto *Extract : ExtractedEHPtrs)
+    VMap[Extract] = EHPtrValue;
+  for (auto *Extract : ExtractedSelectors)
+    VMap[Extract] = SelectorValue;
+}
+
+static bool isFrameAddressCall(const Value *V) {
+  return match(const_cast<Value *>(V),
+               m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0)));
+}
+
+CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // If this is one of the boilerplate landing pad instructions, skip it.
+  // The instruction will have already been remapped in VMap.
+  if (LPadMap.isLandingPadSpecificInst(Inst))
+    return CloningDirector::SkipInstruction;
+
+  // Nested landing pads that have not already been outlined will be cloned as
+  // stubs, with just the landingpad instruction and an unreachable instruction.
+  // When all landingpads have been outlined, we'll replace this with the
+  // llvm.eh.actions call and indirect branch created when the landing pad was
+  // outlined.
+  if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) {
+    return handleLandingPad(VMap, LPad, NewBB);
+  }
+
+  // Nested landing pads that have already been outlined will be cloned in their
+  // outlined form, but we need to intercept the ibr instruction to filter out
+  // targets that do not return to the handler we are outlining.
+  if (auto *IBr = dyn_cast<IndirectBrInst>(Inst)) {
+    return handleIndirectBr(VMap, IBr, NewBB);
+  }
+
+  if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
+    return handleInvoke(VMap, Invoke, NewBB);
+
+  if (auto *Resume = dyn_cast<ResumeInst>(Inst))
+    return handleResume(VMap, Resume, NewBB);
+
+  if (auto *Cmp = dyn_cast<CmpInst>(Inst))
+    return handleCompare(VMap, Cmp, NewBB);
+
+  if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
+    return handleBeginCatch(VMap, Inst, NewBB);
+  if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+    return handleEndCatch(VMap, Inst, NewBB);
+  if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+    return handleTypeIdFor(VMap, Inst, NewBB);
+
+  // When outlining llvm.frameaddress(i32 0), remap that to the second argument,
+  // which is the FP of the parent.
+  if (isFrameAddressCall(Inst)) {
+    VMap[Inst] = ParentFP;
+    return CloningDirector::SkipInstruction;
+  }
+
+  // Continue with the default cloning behavior.
+  return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad(
+    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
+  // If the instruction after the landing pad is a call to llvm.eh.actions
+  // the landing pad has already been outlined.  In this case, we should
+  // clone it because it may return to a block in the handler we are
+  // outlining now that would otherwise be unreachable.  The landing pads
+  // are sorted before outlining begins to enable this case to work
+  // properly.
+  const Instruction *NextI = LPad->getNextNode();
+  if (match(NextI, m_Intrinsic<Intrinsic::eh_actions>()))
+    return CloningDirector::CloneInstruction;
+
+  // If the landing pad hasn't been outlined yet, the landing pad we are
+  // outlining now does not dominate it and so it cannot return to a block
+  // in this handler.  In that case, we can just insert a stub landing
+  // pad now and patch it up later.
+  Instruction *NewInst = LPad->clone();
+  if (LPad->hasName())
+    NewInst->setName(LPad->getName());
+  // Save this correlation for later processing.
+  NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad;
+  VMap[LPad] = NewInst;
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(NewInst);
+  InstList.push_back(new UnreachableInst(NewBB->getContext()));
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // The argument to the call is some form of the first element of the
+  // landingpad aggregate value, but that doesn't matter.  It isn't used
+  // here.
+  // The second argument is an outparameter where the exception object will be
+  // stored. Typically the exception object is a scalar, but it can be an
+  // aggregate when catching by value.
+  // FIXME: Leave something behind to indicate where the exception object lives
+  // for this handler. Should it be part of llvm.eh.actions?
+  assert(ExceptionObjectVar == nullptr && "Multiple calls to "
+                                          "llvm.eh.begincatch found while "
+                                          "outlining catch handler.");
+  ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
+  if (isa<ConstantPointerNull>(ExceptionObjectVar))
+    return CloningDirector::SkipInstruction;
+  assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() &&
+         "catch parameter is not static alloca");
+  Materializer.escapeCatchObject(ExceptionObjectVar);
+  return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
+                                   const Instruction *Inst, BasicBlock *NewBB) {
+  auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+  // It might be interesting to track whether or not we are inside a catch
+  // function, but that might make the algorithm more brittle than it needs
+  // to be.
+
+  // The end catch call can occur in one of two places: either in a
+  // landingpad block that is part of the catch handlers exception mechanism,
+  // or at the end of the catch block.  However, a catch-all handler may call
+  // end catch from the original landing pad.  If the call occurs in a nested
+  // landing pad block, we must skip it and continue so that the landing pad
+  // gets cloned.
+  auto *ParentBB = IntrinCall->getParent();
+  if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB))
+    return CloningDirector::SkipInstruction;
+
+  // If an end catch occurs anywhere else we want to terminate the handler
+  // with a return to the code that follows the endcatch call.  If the
+  // next instruction is not an unconditional branch, we need to split the
+  // block to provide a clear target for the return instruction.
+  BasicBlock *ContinueBB;
+  auto Next = std::next(BasicBlock::const_iterator(IntrinCall));
+  const BranchInst *Branch = dyn_cast<BranchInst>(Next);
+  if (!Branch || !Branch->isUnconditional()) {
+    // We're interrupting the cloning process at this location, so the
+    // const_cast we're doing here will not cause a problem.
+    ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB),
+                            const_cast<Instruction *>(cast<Instruction>(Next)));
+  } else {
+    ContinueBB = Branch->getSuccessor(0);
+  }
+
+  ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB);
+  ReturnTargets.push_back(ContinueBB);
+
+  // We just added a terminator to the cloned block.
+  // Tell the caller to stop processing the current basic block so that
+  // the branch instruction will be skipped.
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+  Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
+  // This causes a replacement that will collapse the landing pad CFG based
+  // on the filter function we intend to match.
+  if (Selector == CurrentSelector)
+    VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+  else
+    VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+  // Tell the caller not to clone this instruction.
+  return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleIndirectBr(
+    ValueToValueMapTy &VMap,
+    const IndirectBrInst *IBr,
+    BasicBlock *NewBB) {
+  // If this indirect branch is not part of a landing pad block, just clone it.
+  const BasicBlock *ParentBB = IBr->getParent();
+  if (!ParentBB->isLandingPad())
+    return CloningDirector::CloneInstruction;
+
+  // If it is part of a landing pad, we want to filter out target blocks
+  // that are not part of the handler we are outlining.
+  const LandingPadInst *LPad = ParentBB->getLandingPadInst();
+
+  // Save this correlation for later processing.
+  NestedLPtoOriginalLP[cast<LandingPadInst>(VMap[LPad])] = LPad;
+
+  // We should only get here for landing pads that have already been outlined.
+  assert(match(LPad->getNextNode(), m_Intrinsic<Intrinsic::eh_actions>()));
+
+  // Copy the indirectbr, but only include targets that were previously
+  // identified as EH blocks and are dominated by the nested landing pad.
+  SetVector<const BasicBlock *> ReturnTargets;
+  for (int I = 0, E = IBr->getNumDestinations(); I < E; ++I) {
+    auto *TargetBB = IBr->getDestination(I);
+    if (EHBlocks.count(const_cast<BasicBlock*>(TargetBB)) &&
+        DT->dominates(ParentBB, TargetBB)) {
+      DEBUG(dbgs() << "  Adding destination " << TargetBB->getName() << "\n");
+      ReturnTargets.insert(TargetBB);
+    }
+  }
+  IndirectBrInst *NewBranch = 
+        IndirectBrInst::Create(const_cast<Value *>(IBr->getAddress()),
+                               ReturnTargets.size(), NewBB);
+  for (auto *Target : ReturnTargets)
+    NewBranch->addDestination(const_cast<BasicBlock*>(Target));
+
+  // The operands and targets of the branch instruction are remapped later
+  // because it is a terminator.  Tell the cloning code to clone the
+  // blocks we just added to the target list.
+  return CloningDirector::CloneSuccessors;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap,
+                                 const InvokeInst *Invoke, BasicBlock *NewBB) {
+  return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
+                                 const ResumeInst *Resume, BasicBlock *NewBB) {
+  // Resume instructions shouldn't be reachable from catch handlers.
+  // We still need to handle it, but it will be pruned.
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(new UnreachableInst(NewBB->getContext()));
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleCompare(ValueToValueMapTy &VMap,
+                                  const CmpInst *Compare, BasicBlock *NewBB) {
+  const IntrinsicInst *IntrinCall = nullptr;
+  if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
+    IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(0));
+  } else if (match(Compare->getOperand(1),
+                   m_Intrinsic<Intrinsic::eh_typeid_for>())) {
+    IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(1));
+  }
+  if (IntrinCall) {
+    Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
+    // This causes a replacement that will collapse the landing pad CFG based
+    // on the filter function we intend to match.
+    if (Selector == CurrentSelector->stripPointerCasts()) {
+      VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
+    } else {
+      VMap[Compare] = ConstantInt::get(SelectorIDType, 0);
+    }
+    return CloningDirector::SkipInstruction;
+  }
+  return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad(
+    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
+  // The MS runtime will terminate the process if an exception occurs in a
+  // cleanup handler, so we shouldn't encounter landing pads in the actual
+  // cleanup code, but they may appear in catch blocks.  Depending on where
+  // we started cloning we may see one, but it will get dropped during dead
+  // block pruning.
+  Instruction *NewInst = new UnreachableInst(NewBB->getContext());
+  VMap[LPad] = NewInst;
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(NewInst);
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // Cleanup code may flow into catch blocks or the catch block may be part
+  // of a branch that will be optimized away.  We'll insert a return
+  // instruction now, but it may be pruned before the cloning process is
+  // complete.
+  ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // Cleanup handlers nested within catch handlers may begin with a call to
+  // eh.endcatch.  We can just ignore that instruction.
+  return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
+    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+  // If we encounter a selector comparison while cloning a cleanup handler,
+  // we want to stop cloning immediately.  Anything after the dispatch
+  // will be outlined into a different handler.
+  BasicBlock *CatchHandler;
+  Constant *Selector;
+  BasicBlock *NextBB;
+  if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()),
+                         CatchHandler, Selector, NextBB)) {
+    ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+    return CloningDirector::StopCloningBB;
+  }
+  // If eg.typeid.for is called for any other reason, it can be ignored.
+  VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+  return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleIndirectBr(
+    ValueToValueMapTy &VMap,
+    const IndirectBrInst *IBr,
+    BasicBlock *NewBB) {
+  // No special handling is required for cleanup cloning.
+  return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
+    ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) {
+  // All invokes in cleanup handlers can be replaced with calls.
+  SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3);
+  // Insert a normal call instruction...
+  CallInst *NewCall =
+      CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs,
+                       Invoke->getName(), NewBB);
+  NewCall->setCallingConv(Invoke->getCallingConv());
+  NewCall->setAttributes(Invoke->getAttributes());
+  NewCall->setDebugLoc(Invoke->getDebugLoc());
+  VMap[Invoke] = NewCall;
+
+  // Remap the operands.
+  llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer);
+
+  // Insert an unconditional branch to the normal destination.
+  BranchInst::Create(Invoke->getNormalDest(), NewBB);
+
+  // The unwind destination won't be cloned into the new function, so
+  // we don't need to clean up its phi nodes.
+
+  // We just added a terminator to the cloned block.
+  // Tell the caller to stop processing the current basic block.
+  return CloningDirector::CloneSuccessors;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
+    ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) {
+  ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+
+  // We just added a terminator to the cloned block.
+  // Tell the caller to stop processing the current basic block so that
+  // the branch instruction will be skipped.
+  return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction
+WinEHCleanupDirector::handleCompare(ValueToValueMapTy &VMap,
+                                    const CmpInst *Compare, BasicBlock *NewBB) {
+  if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>()) ||
+      match(Compare->getOperand(1), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
+    VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
+    return CloningDirector::SkipInstruction;
+  }
+  return CloningDirector::CloneInstruction;
+}
+
+WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
+    Function *OutlinedFn, Value *ParentFP, FrameVarInfoMap &FrameVarInfo)
+    : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
+  BasicBlock *EntryBB = &OutlinedFn->getEntryBlock();
+
+  // New allocas should be inserted in the entry block, but after the parent FP
+  // is established if it is an instruction.
+  Instruction *InsertPoint = EntryBB->getFirstInsertionPt();
+  if (auto *FPInst = dyn_cast<Instruction>(ParentFP))
+    InsertPoint = FPInst->getNextNode();
+  Builder.SetInsertPoint(EntryBB, InsertPoint);
+}
+
+Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
+  // If we're asked to materialize a static alloca, we temporarily create an
+  // alloca in the outlined function and add this to the FrameVarInfo map.  When
+  // all the outlining is complete, we'll replace these temporary allocas with
+  // calls to llvm.framerecover.
+  if (auto *AV = dyn_cast<AllocaInst>(V)) {
+    assert(AV->isStaticAlloca() &&
+           "cannot materialize un-demoted dynamic alloca");
+    AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone());
+    Builder.Insert(NewAlloca, AV->getName());
+    FrameVarInfo[AV].push_back(NewAlloca);
+    return NewAlloca;
+  }
+
+  if (isa<Instruction>(V) || isa<Argument>(V)) {
+    Function *Parent = isa<Instruction>(V)
+                           ? cast<Instruction>(V)->getParent()->getParent()
+                           : cast<Argument>(V)->getParent();
+    errs()
+        << "Failed to demote instruction used in exception handler of function "
+        << GlobalValue::getRealLinkageName(Parent->getName()) << ":\n";
+    errs() << "  " << *V << '\n';
+    report_fatal_error("WinEHPrepare failed to demote instruction");
+  }
+
+  // Don't materialize other values.
+  return nullptr;
+}
+
+void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
+  // Catch parameter objects have to live in the parent frame. When we see a use
+  // of a catch parameter, add a sentinel to the multimap to indicate that it's
+  // used from another handler. This will prevent us from trying to sink the
+  // alloca into the handler and ensure that the catch parameter is present in
+  // the call to llvm.frameescape.
+  FrameVarInfo[V].push_back(getCatchObjectSentinel());
+}
+
+// This function maps the catch and cleanup handlers that are reachable from the
+// specified landing pad. The landing pad sequence will have this basic shape:
+//
+//  <cleanup handler>
+//  <selector comparison>
+//  <catch handler>
+//  <cleanup handler>
+//  <selector comparison>
+//  <catch handler>
+//  <cleanup handler>
+//  ...
+//
+// Any of the cleanup slots may be absent.  The cleanup slots may be occupied by
+// any arbitrary control flow, but all paths through the cleanup code must
+// eventually reach the next selector comparison and no path can skip to a
+// different selector comparisons, though some paths may terminate abnormally.
+// Therefore, we will use a depth first search from the start of any given
+// cleanup block and stop searching when we find the next selector comparison.
+//
+// If the landingpad instruction does not have a catch clause, we will assume
+// that any instructions other than selector comparisons and catch handlers can
+// be ignored.  In practice, these will only be the boilerplate instructions.
+//
+// The catch handlers may also have any control structure, but we are only
+// interested in the start of the catch handlers, so we don't need to actually
+// follow the flow of the catch handlers.  The start of the catch handlers can
+// be located from the compare instructions, but they can be skipped in the
+// flow by following the contrary branch.
+void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
+                                       LandingPadActions &Actions) {
+  unsigned int NumClauses = LPad->getNumClauses();
+  unsigned int HandlersFound = 0;
+  BasicBlock *BB = LPad->getParent();
+
+  DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
+
+  if (NumClauses == 0) {
+    findCleanupHandlers(Actions, BB, nullptr);
+    return;
+  }
+
+  VisitedBlockSet VisitedBlocks;
+
+  while (HandlersFound != NumClauses) {
+    BasicBlock *NextBB = nullptr;
+
+    // Skip over filter clauses.
+    if (LPad->isFilter(HandlersFound)) {
+      ++HandlersFound;
+      continue;
+    }
+
+    // See if the clause we're looking for is a catch-all.
+    // If so, the catch begins immediately.
+    Constant *ExpectedSelector =
+        LPad->getClause(HandlersFound)->stripPointerCasts();
+    if (isa<ConstantPointerNull>(ExpectedSelector)) {
+      // The catch all must occur last.
+      assert(HandlersFound == NumClauses - 1);
+
+      // There can be additional selector dispatches in the call chain that we
+      // need to ignore.
+      BasicBlock *CatchBlock = nullptr;
+      Constant *Selector;
+      while (BB && isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
+        DEBUG(dbgs() << "  Found extra catch dispatch in block "
+                     << CatchBlock->getName() << "\n");
+        BB = NextBB;
+      }
+
+      // Add the catch handler to the action list.
+      CatchHandler *Action = nullptr;
+      if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
+        // If the CatchHandlerMap already has an entry for this BB, re-use it.
+        Action = CatchHandlerMap[BB];
+        assert(Action->getSelector() == ExpectedSelector);
+      } else {
+        // We don't expect a selector dispatch, but there may be a call to
+        // llvm.eh.begincatch, which separates catch handling code from
+        // cleanup code in the same control flow.  This call looks for the
+        // begincatch intrinsic.
+        Action = findCatchHandler(BB, NextBB, VisitedBlocks);
+        if (Action) {
+          // For C++ EH, check if there is any interesting cleanup code before
+          // we begin the catch. This is important because cleanups cannot
+          // rethrow exceptions but code called from catches can. For SEH, it
+          // isn't important if some finally code before a catch-all is executed
+          // out of line or after recovering from the exception.
+          if (Personality == EHPersonality::MSVC_CXX)
+            findCleanupHandlers(Actions, BB, BB);
+        } else {
+          // If an action was not found, it means that the control flows
+          // directly into the catch-all handler and there is no cleanup code.
+          // That's an expected situation and we must create a catch action.
+          // Since this is a catch-all handler, the selector won't actually
+          // appear in the code anywhere.  ExpectedSelector here is the constant
+          // null ptr that we got from the landing pad instruction.
+          Action = new CatchHandler(BB, ExpectedSelector, nullptr);
+          CatchHandlerMap[BB] = Action;
+        }
+      }
+      Actions.insertCatchHandler(Action);
+      DEBUG(dbgs() << "  Catch all handler at block " << BB->getName() << "\n");
+      ++HandlersFound;
+
+      // Once we reach a catch-all, don't expect to hit a resume instruction.
+      BB = nullptr;
+      break;
+    }
+
+    CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
+    assert(CatchAction);
+
+    // See if there is any interesting code executed before the dispatch.
+    findCleanupHandlers(Actions, BB, CatchAction->getStartBlock());
+
+    // When the source program contains multiple nested try blocks the catch
+    // handlers can get strung together in such a way that we can encounter
+    // a dispatch for a selector that we've already had a handler for.
+    if (CatchAction->getSelector()->stripPointerCasts() == ExpectedSelector) {
+      ++HandlersFound;
+
+      // Add the catch handler to the action list.
+      DEBUG(dbgs() << "  Found catch dispatch in block "
+                   << CatchAction->getStartBlock()->getName() << "\n");
+      Actions.insertCatchHandler(CatchAction);
+    } else {
+      // Under some circumstances optimized IR will flow unconditionally into a
+      // handler block without checking the selector.  This can only happen if
+      // the landing pad has a catch-all handler and the handler for the
+      // preceeding catch clause is identical to the catch-call handler
+      // (typically an empty catch).  In this case, the handler must be shared
+      // by all remaining clauses.
+      if (isa<ConstantPointerNull>(
+              CatchAction->getSelector()->stripPointerCasts())) {
+        DEBUG(dbgs() << "  Applying early catch-all handler in block "
+                     << CatchAction->getStartBlock()->getName()
+                     << "  to all remaining clauses.\n");
+        Actions.insertCatchHandler(CatchAction);
+        return;
+      }
+
+      DEBUG(dbgs() << "  Found extra catch dispatch in block "
+                   << CatchAction->getStartBlock()->getName() << "\n");
+    }
+
+    // Move on to the block after the catch handler.
+    BB = NextBB;
+  }
+
+  // If we didn't wind up in a catch-all, see if there is any interesting code
+  // executed before the resume.
+  findCleanupHandlers(Actions, BB, BB);
+
+  // It's possible that some optimization moved code into a landingpad that
+  // wasn't
+  // previously being used for cleanup.  If that happens, we need to execute
+  // that
+  // extra code from a cleanup handler.
+  if (Actions.includesCleanup() && !LPad->isCleanup())
+    LPad->setCleanup(true);
+}
+
+// This function searches starting with the input block for the next
+// block that terminates with a branch whose condition is based on a selector
+// comparison.  This may be the input block.  See the mapLandingPadBlocks
+// comments for a discussion of control flow assumptions.
+//
+CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
+                                             BasicBlock *&NextBB,
+                                             VisitedBlockSet &VisitedBlocks) {
+  // See if we've already found a catch handler use it.
+  // Call count() first to avoid creating a null entry for blocks
+  // we haven't seen before.
+  if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
+    CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]);
+    NextBB = Action->getNextBB();
+    return Action;
+  }
+
+  // VisitedBlocks applies only to the current search.  We still
+  // need to consider blocks that we've visited while mapping other
+  // landing pads.
+  VisitedBlocks.insert(BB);
+
+  BasicBlock *CatchBlock = nullptr;
+  Constant *Selector = nullptr;
+
+  // If this is the first time we've visited this block from any landing pad
+  // look to see if it is a selector dispatch block.
+  if (!CatchHandlerMap.count(BB)) {
+    if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
+      CatchHandler *Action = new CatchHandler(BB, Selector, NextBB);
+      CatchHandlerMap[BB] = Action;
+      return Action;
+    }
+    // If we encounter a block containing an llvm.eh.begincatch before we
+    // find a selector dispatch block, the handler is assumed to be
+    // reached unconditionally.  This happens for catch-all blocks, but
+    // it can also happen for other catch handlers that have been combined
+    // with the catch-all handler during optimization.
+    if (isCatchBlock(BB)) {
+      PointerType *Int8PtrTy = Type::getInt8PtrTy(BB->getContext());
+      Constant *NullSelector = ConstantPointerNull::get(Int8PtrTy);
+      CatchHandler *Action = new CatchHandler(BB, NullSelector, nullptr);
+      CatchHandlerMap[BB] = Action;
+      return Action;
+    }
+  }
+
+  // Visit each successor, looking for the dispatch.
+  // FIXME: We expect to find the dispatch quickly, so this will probably
+  //        work better as a breadth first search.
+  for (BasicBlock *Succ : successors(BB)) {
+    if (VisitedBlocks.count(Succ))
+      continue;
+
+    CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks);
+    if (Action)
+      return Action;
+  }
+  return nullptr;
+}
+
+// These are helper functions to combine repeated code from findCleanupHandlers.
+static void createCleanupHandler(LandingPadActions &Actions,
+                                 CleanupHandlerMapTy &CleanupHandlerMap,
+                                 BasicBlock *BB) {
+  CleanupHandler *Action = new CleanupHandler(BB);
+  CleanupHandlerMap[BB] = Action;
+  Actions.insertCleanupHandler(Action);
+  DEBUG(dbgs() << "  Found cleanup code in block "
+               << Action->getStartBlock()->getName() << "\n");
+}
+
+static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
+                                         Instruction *MaybeCall) {
+  // Look for finally blocks that Clang has already outlined for us.
+  //   %fp = call i8* @llvm.frameaddress(i32 0)
+  //   call void @"fin$parent"(iN 1, i8* %fp)
+  if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
+    MaybeCall = MaybeCall->getNextNode();
+  CallSite FinallyCall(MaybeCall);
+  if (!FinallyCall || FinallyCall.arg_size() != 2)
+    return CallSite();
+  if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
+    return CallSite();
+  if (!isFrameAddressCall(FinallyCall.getArgument(1)))
+    return CallSite();
+  return FinallyCall;
+}
+
+static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) {
+  // Skip single ubr blocks.
+  while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) {
+    auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
+    if (Br && Br->isUnconditional())
+      BB = Br->getSuccessor(0);
+    else
+      return BB;
+  }
+  return BB;
+}
+
+// This function searches starting with the input block for the next block that
+// contains code that is not part of a catch handler and would not be eliminated
+// during handler outlining.
+//
+void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
+                                       BasicBlock *StartBB, BasicBlock *EndBB) {
+  // Here we will skip over the following:
+  //
+  // landing pad prolog:
+  //
+  // Unconditional branches
+  //
+  // Selector dispatch
+  //
+  // Resume pattern
+  //
+  // Anything else marks the start of an interesting block
+
+  BasicBlock *BB = StartBB;
+  // Anything other than an unconditional branch will kick us out of this loop
+  // one way or another.
+  while (BB) {
+    BB = followSingleUnconditionalBranches(BB);
+    // If we've already scanned this block, don't scan it again.  If it is
+    // a cleanup block, there will be an action in the CleanupHandlerMap.
+    // If we've scanned it and it is not a cleanup block, there will be a
+    // nullptr in the CleanupHandlerMap.  If we have not scanned it, there will
+    // be no entry in the CleanupHandlerMap.  We must call count() first to
+    // avoid creating a null entry for blocks we haven't scanned.
+    if (CleanupHandlerMap.count(BB)) {
+      if (auto *Action = CleanupHandlerMap[BB]) {
+        Actions.insertCleanupHandler(Action);
+        DEBUG(dbgs() << "  Found cleanup code in block "
+                     << Action->getStartBlock()->getName() << "\n");
+        // FIXME: This cleanup might chain into another, and we need to discover
+        // that.
+        return;
+      } else {
+        // Here we handle the case where the cleanup handler map contains a
+        // value for this block but the value is a nullptr.  This means that
+        // we have previously analyzed the block and determined that it did
+        // not contain any cleanup code.  Based on the earlier analysis, we
+        // know the the block must end in either an unconditional branch, a
+        // resume or a conditional branch that is predicated on a comparison
+        // with a selector.  Either the resume or the selector dispatch
+        // would terminate the search for cleanup code, so the unconditional
+        // branch is the only case for which we might need to continue
+        // searching.
+        BasicBlock *SuccBB = followSingleUnconditionalBranches(BB);
+        if (SuccBB == BB || SuccBB == EndBB)
+          return;
+        BB = SuccBB;
+        continue;
+      }
+    }
+
+    // Create an entry in the cleanup handler map for this block.  Initially
+    // we create an entry that says this isn't a cleanup block.  If we find
+    // cleanup code, the caller will replace this entry.
+    CleanupHandlerMap[BB] = nullptr;
+
+    TerminatorInst *Terminator = BB->getTerminator();
+
+    // Landing pad blocks have extra instructions we need to accept.
+    LandingPadMap *LPadMap = nullptr;
+    if (BB->isLandingPad()) {
+      LandingPadInst *LPad = BB->getLandingPadInst();
+      LPadMap = &LPadMaps[LPad];
+      if (!LPadMap->isInitialized())
+        LPadMap->mapLandingPad(LPad);
+    }
+
+    // Look for the bare resume pattern:
+    //   %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0
+    //   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1
+    //   resume { i8*, i32 } %lpad.val2
+    if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
+      InsertValueInst *Insert1 = nullptr;
+      InsertValueInst *Insert2 = nullptr;
+      Value *ResumeVal = Resume->getOperand(0);
+      // If the resume value isn't a phi or landingpad value, it should be a
+      // series of insertions. Identify them so we can avoid them when scanning
+      // for cleanups.
+      if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) {
+        Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
+        if (!Insert2)
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+        Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
+        if (!Insert1)
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      }
+      for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+           II != IE; ++II) {
+        Instruction *Inst = II;
+        if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+          continue;
+        if (Inst == Insert1 || Inst == Insert2 || Inst == Resume)
+          continue;
+        if (!Inst->hasOneUse() ||
+            (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+        }
+      }
+      return;
+    }
+
+    BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
+    if (Branch && Branch->isConditional()) {
+      // Look for the selector dispatch.
+      //   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
+      //   %matches = icmp eq i32 %sel, %2
+      //   br i1 %matches, label %catch14, label %eh.resume
+      CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
+      if (!Compare || !Compare->isEquality())
+        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+           II != IE; ++II) {
+        Instruction *Inst = II;
+        if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+          continue;
+        if (Inst == Compare || Inst == Branch)
+          continue;
+        if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+          continue;
+        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      }
+      // The selector dispatch block should always terminate our search.
+      assert(BB == EndBB);
+      return;
+    }
+
+    if (isAsynchronousEHPersonality(Personality)) {
+      // If this is a landingpad block, split the block at the first non-landing
+      // pad instruction.
+      Instruction *MaybeCall = BB->getFirstNonPHIOrDbg();
+      if (LPadMap) {
+        while (MaybeCall != BB->getTerminator() &&
+               LPadMap->isLandingPadSpecificInst(MaybeCall))
+          MaybeCall = MaybeCall->getNextNode();
+      }
+
+      // Look for outlined finally calls.
+      if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
+        Function *Fin = FinallyCall.getCalledFunction();
+        assert(Fin && "outlined finally call should be direct");
+        auto *Action = new CleanupHandler(BB);
+        Action->setHandlerBlockOrFunc(Fin);
+        Actions.insertCleanupHandler(Action);
+        CleanupHandlerMap[BB] = Action;
+        DEBUG(dbgs() << "  Found frontend-outlined finally call to "
+                     << Fin->getName() << " in block "
+                     << Action->getStartBlock()->getName() << "\n");
+
+        // Split the block if there were more interesting instructions and look
+        // for finally calls in the normal successor block.
+        BasicBlock *SuccBB = BB;
+        if (FinallyCall.getInstruction() != BB->getTerminator() &&
+            FinallyCall.getInstruction()->getNextNode() !=
+                BB->getTerminator()) {
+          SuccBB =
+              SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT);
+        } else {
+          if (FinallyCall.isInvoke()) {
+            SuccBB =
+                cast<InvokeInst>(FinallyCall.getInstruction())->getNormalDest();
+          } else {
+            SuccBB = BB->getUniqueSuccessor();
+            assert(SuccBB &&
+                   "splitOutlinedFinallyCalls didn't insert a branch");
+          }
+        }
+        BB = SuccBB;
+        if (BB == EndBB)
+          return;
+        continue;
+      }
+    }
+
+    // Anything else is either a catch block or interesting cleanup code.
+    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+         II != IE; ++II) {
+      Instruction *Inst = II;
+      if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+        continue;
+      // Unconditional branches fall through to this loop.
+      if (Inst == Branch)
+        continue;
+      // If this is a catch block, there is no cleanup code to be found.
+      if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
+        return;
+      // If this a nested landing pad, it may contain an endcatch call.
+      if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+        return;
+      // Anything else makes this interesting cleanup code.
+      return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+    }
+
+    // Only unconditional branches in empty blocks should get this far.
+    assert(Branch && Branch->isUnconditional());
+    if (BB == EndBB)
+      return;
+    BB = Branch->getSuccessor(0);
+  }
+}
+
+// This is a public function, declared in WinEHFuncInfo.h and is also
+// referenced by WinEHNumbering in FunctionLoweringInfo.cpp.
+void llvm::parseEHActions(
+    const IntrinsicInst *II,
+    SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions) {
+  for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) {
+    uint64_t ActionKind =
+        cast<ConstantInt>(II->getArgOperand(I))->getZExtValue();
+    if (ActionKind == /*catch=*/1) {
+      auto *Selector = cast<Constant>(II->getArgOperand(I + 1));
+      ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2));
+      int64_t EHObjIndexVal = EHObjIndex->getSExtValue();
+      Constant *Handler = cast<Constant>(II->getArgOperand(I + 3));
+      I += 4;
+      auto CH = make_unique<CatchHandler>(/*BB=*/nullptr, Selector,
+                                          /*NextBB=*/nullptr);
+      CH->setHandlerBlockOrFunc(Handler);
+      CH->setExceptionVarIndex(EHObjIndexVal);
+      Actions.push_back(std::move(CH));
+    } else if (ActionKind == 0) {
+      Constant *Handler = cast<Constant>(II->getArgOperand(I + 1));
+      I += 2;
+      auto CH = make_unique<CleanupHandler>(/*BB=*/nullptr);
+      CH->setHandlerBlockOrFunc(Handler);
+      Actions.push_back(std::move(CH));
+    } else {
+      llvm_unreachable("Expected either a catch or cleanup handler!");
+    }
+  }
+  std::reverse(Actions.begin(), Actions.end());
+}
author	dim <dim@FreeBSD.org>	2015-05-27 20:26:41 +0000
committer	dim <dim@FreeBSD.org>	2015-05-27 20:26:41 +0000
commit	5ef8fd3549d38e883a31881636be3dc2a275de20 (patch)
tree	bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/CodeGen
parent	77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff)
parent	782067d0278612ee75d024b9b135c221c327e9e8 (diff)
download	FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz